diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-09-21 17:26:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-21 17:26:22 +0200 |
commit | 30d66b745c033484184029fb8bf688b8a79f17d4 (patch) | |
tree | a17deca77a8d635f4ff6a0e293dd444f2aafe3a7 /searchlib/src | |
parent | 95daa49ef952798c71b096b28a9ccd3c6f124478 (diff) | |
parent | 9edf3caed8ecad63d1f1bb5b07510934690cc6d2 (diff) |
Merge pull request #28606 from vespa-engine/geirst/fuzzy-matching-algorithm-query-property
Add query property to control fuzzy matching algorithm.
Diffstat (limited to 'searchlib/src')
26 files changed, 134 insertions, 31 deletions
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp index f708df0a862..8d51eb56cc3 100644 --- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -536,6 +536,7 @@ void RankSetupTest::testRankSetup() env.getProperties().add(matching::GlobalFilterLowerLimit::NAME, "0.3"); env.getProperties().add(matching::GlobalFilterUpperLimit::NAME, "0.7"); env.getProperties().add(matching::TargetHitsMaxAdjustmentFactor::NAME, "5.0"); + env.getProperties().add(matching::FuzzyAlgorithm::NAME, "dfa_implicit"); RankSetup rs(_factory, env); EXPECT_FALSE(rs.has_match_features()); @@ -577,6 +578,7 @@ void RankSetupTest::testRankSetup() EXPECT_EQUAL(rs.get_global_filter_lower_limit(), 0.3); EXPECT_EQUAL(rs.get_global_filter_upper_limit(), 0.7); EXPECT_EQUAL(rs.get_target_hits_max_adjustment_factor(), 5.0); + EXPECT_EQUAL(rs.get_fuzzy_matching_algorithm(), vespalib::FuzzyMatchingAlgorithm::DfaImplicit); } bool diff --git a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h index 8ed7eadf919..1c3b32bd777 100644 --- a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h +++ b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h @@ -3,6 +3,8 @@ #pragma once #include "i_document_meta_store_context.h" +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> #include <cstddef> #include <limits> #include <cstdint> @@ -21,6 +23,8 @@ private: uint32_t _diversityCutoffGroups; bool _useBitVector; bool _diversityCutoffStrict; + vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm; + public: SearchContextParams() @@ -28,13 +32,15 @@ public: _metaStoreReadGuard(nullptr), _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()), _useBitVector(false), - _diversityCutoffStrict(false) + _diversityCutoffStrict(false), + _fuzzy_matching_algorithm(search::fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE) { } bool useBitVector() const { return _useBitVector; } const IAttributeVector * diversityAttribute() const { return _diversityAttribute; } uint32_t diversityCutoffGroups() const { return _diversityCutoffGroups; } bool diversityCutoffStrict() const { return _diversityCutoffStrict; } const IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard() const { return _metaStoreReadGuard; } + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; } SearchContextParams &useBitVector(bool value) { _useBitVector = value; @@ -56,6 +62,10 @@ public: _metaStoreReadGuard = readGuard; return *this; } + SearchContextParams& fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm value) { + _fuzzy_matching_algorithm = value; + return *this; + } }; } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 453b7b321b9..1519bb14554 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -713,6 +713,7 @@ public: template <class TermNode> void visitTerm(TermNode &n) { SearchContextParams scParams = createContextParams(_field.isFilter()); + scParams.fuzzy_matching_algorithm(getRequestContext().get_attribute_blueprint_params().fuzzy_matching_algorithm); const string stack = StackDumpCreator::create(n); setResult(std::make_unique<AttributeFieldBlueprint>(_field, _attr, stack, scParams)); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h index 64213235c23..1f9a3ebfa7e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h @@ -3,6 +3,7 @@ #pragma once #include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search::attribute { @@ -14,20 +15,24 @@ struct AttributeBlueprintParams double global_filter_lower_limit; double global_filter_upper_limit; double target_hits_max_adjustment_factor; + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm; AttributeBlueprintParams(double global_filter_lower_limit_in, double global_filter_upper_limit_in, - double target_hits_max_adjustment_factor_in) + double target_hits_max_adjustment_factor_in, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in) : global_filter_lower_limit(global_filter_lower_limit_in), global_filter_upper_limit(global_filter_upper_limit_in), - target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in) + target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in), + fuzzy_matching_algorithm(fuzzy_matching_algorithm_in) { } AttributeBlueprintParams() : AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE, fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE, - fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE) + fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE, + fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE) { } }; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h index 3ae342be61b..f418e698585 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h @@ -4,6 +4,8 @@ #include "multi_string_enum_search_context.h" #include "enumhintsearchcontext.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> + namespace search::attribute { @@ -17,7 +19,12 @@ class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>, public EnumHintSearchContext { public: - MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values); + MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + MultiValueMappingReadView<M> mv_mapping_read_view, + const EnumStoreT<const char*>& enum_store, + uint32_t doc_id_limit, uint64_t num_values); ~MultiStringEnumHintSearchContext() override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp index fc1f72c940f..f4b96a46e3d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp @@ -6,8 +6,13 @@ namespace search::attribute { template <typename M> -MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values) - : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping_read_view, enum_store), +MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + MultiValueMappingReadView<M> mv_mapping_read_view, + const EnumStoreT<const char*>& enum_store, + uint32_t doc_id_limit, uint64_t num_values) + : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, mv_mapping_read_view, enum_store), EnumHintSearchContext(enum_store.get_dictionary(), doc_id_limit, num_values) { diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h index 1787ea0086d..c9b8e8271b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h @@ -4,6 +4,7 @@ #include "multi_enum_search_context.h" #include "string_search_context.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search::attribute { @@ -15,7 +16,11 @@ template <typename M> class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M> { public: - MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store); + MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + MultiValueMappingReadView<M> mv_mapping_read_view, + const EnumStoreT<const char*>& enum_store); }; } diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp index 1d74db04373..48d1e8b6406 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp @@ -9,8 +9,12 @@ namespace search::attribute { template <typename M> -MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store) - : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping_read_view, enum_store) +MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + MultiValueMappingReadView<M> mv_mapping_read_view, + const EnumStoreT<const char*>& enum_store) + : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm), toBeSearched, mv_mapping_read_view, enum_store) { } diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index 43bb1c5ebb0..53e5f0d2e12 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -46,11 +46,12 @@ MultiValueStringAttributeT<B, M>::freezeEnumDictionary() template <typename B, typename M> std::unique_ptr<attribute::SearchContext> MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm, - const attribute::SearchContextParams &) const + const attribute::SearchContextParams ¶ms) const { bool cased = this->get_match_is_cased(); auto doc_id_limit = this->getCommittedDocIdLimit(); - return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues()); + return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), + *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues()); } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index fe52b785fa7..3da6357bb53 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -99,7 +99,7 @@ MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm, using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>; bool cased = this->get_match_is_cased(); auto doc_id_limit = this->getCommittedDocIdLimit(); - BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore); + BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore); return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this); } diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp index 2d1748cefa5..95ba37d85be 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp @@ -5,8 +5,13 @@ namespace search::attribute { -SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values) - : SingleStringEnumSearchContext(std::move(qTerm), cased, toBeSearched, enum_indices, enum_store), +SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + EnumIndices enum_indices, + const EnumStoreT<const char*>& enum_store, + uint64_t num_values) + : SingleStringEnumSearchContext(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, enum_indices, enum_store), EnumHintSearchContext(enum_store.get_dictionary(), enum_indices.size(), num_values) { diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h index f157bf17a71..595d1ac8c57 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h @@ -4,6 +4,7 @@ #include "single_string_enum_search_context.h" #include "enumhintsearchcontext.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search::attribute { @@ -16,7 +17,12 @@ class SingleStringEnumHintSearchContext : public SingleStringEnumSearchContext, public EnumHintSearchContext { public: - SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values); + SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + EnumIndices enum_indices, + const EnumStoreT<const char*>& enum_store, + uint64_t num_values); ~SingleStringEnumHintSearchContext() override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp index 8d23eaf7af0..42aebe9f814 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp @@ -6,8 +6,13 @@ namespace search::attribute { -SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store) - : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased), toBeSearched, enum_indices, enum_store) +SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + EnumIndices enum_indices, + const EnumStoreT<const char*>& enum_store) + : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm), + toBeSearched, enum_indices, enum_store) { } diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h index b8014b1b0e3..71c62af33aa 100644 --- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h @@ -4,6 +4,7 @@ #include "single_enum_search_context.h" #include "string_search_context.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search::attribute { @@ -14,7 +15,11 @@ namespace search::attribute { class SingleStringEnumSearchContext : public SingleEnumSearchContext<const char*, StringSearchContext> { public: - SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store); + SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm, + const AttributeVector& toBeSearched, + EnumIndices enum_indices, + const EnumStoreT<const char*>& enum_store); SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept; ~SingleStringEnumSearchContext() override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index c3f5c295260..c4c6fc97053 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -43,11 +43,12 @@ SingleValueStringAttributeT<B>::freezeEnumDictionary() template <typename B> std::unique_ptr<attribute::SearchContext> SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, - const attribute::SearchContextParams &) const + const attribute::SearchContextParams& params) const { bool cased = this->get_match_is_cased(); auto docid_limit = this->getCommittedDocIdLimit(); - return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues()); + return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), + *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index 60847636baa..20d672411f8 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -146,7 +146,7 @@ SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, vespalib::btree::BTreeNoLeafData>; bool cased = this->get_match_is_cased(); auto docid_limit = this->getCommittedDocIdLimit(); - BaseSC base_sc(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore); + BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore); return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this); diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp index bc3637e7215..8b755d5f3b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp @@ -5,9 +5,9 @@ namespace search::attribute { -StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased) +StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm) : _query_term(static_cast<QueryTermUCS4 *>(query_term.release())), - _helper(*_query_term, cased) + _helper(*_query_term, cased, fuzzy_matching_algorithm) { } diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.h b/searchlib/src/vespa/searchlib/attribute/string_matcher.h index ea4debecc0d..05089e1251a 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.h @@ -3,6 +3,7 @@ #pragma once #include "string_search_helper.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search { class QueryTermSimple; } @@ -18,7 +19,7 @@ private: std::unique_ptr<QueryTermUCS4> _query_term; attribute::StringSearchHelper _helper; public: - StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased); + StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm); StringMatcher(StringMatcher&&) noexcept; ~StringMatcher(); protected: diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp index fadf7a3151d..119b4a60d0c 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp @@ -9,9 +9,10 @@ namespace search::attribute { -StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased) +StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, + bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm) : SearchContext(to_be_searched), - StringMatcher(std::move(query_term), cased) + StringMatcher(std::move(query_term), cased, fuzzy_matching_algorithm) { } diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.h b/searchlib/src/vespa/searchlib/attribute/string_search_context.h index a0014379436..e459153d2b8 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.h @@ -4,6 +4,7 @@ #include "search_context.h" #include "string_matcher.h" +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search { @@ -24,7 +25,8 @@ class StringSearchContext : public SearchContext, public StringMatcher protected: using MatcherType = StringMatcher; public: - StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased); + StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, + bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm); StringSearchContext(const AttributeVector& to_be_searched, StringMatcher&& matcher); StringSearchContext(StringSearchContext &&) noexcept; ~StringSearchContext() override; diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 60c00a043d0..1efe39667b8 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -9,7 +9,7 @@ namespace search::attribute { -StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) +StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm) : _regex(), _fuzzyMatcher(), _term(), @@ -24,6 +24,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) ? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None) : vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase); } else if (isFuzzy()) { + (void) fuzzy_matching_algorithm; + // TODO: Select implementation based on algorithm. _fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(), term.getFuzzyMaxEditDistance(), term.getFuzzyPrefixLength(), diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h index 3db0d4dbb5f..0e7a116a874 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> #include <vespa/vespalib/regex/regex.h> namespace vespalib { class FuzzyMatcher; } @@ -16,7 +17,8 @@ namespace search::attribute { class StringSearchHelper { public: using FuzzyMatcher = vespalib::FuzzyMatcher; - StringSearchHelper(QueryTermUCS4 & qTerm, bool cased); + StringSearchHelper(QueryTermUCS4 & qTerm, bool cased, + vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm = vespalib::FuzzyMatchingAlgorithm::BruteForce); StringSearchHelper(StringSearchHelper&&) noexcept; StringSearchHelper(const StringSearchHelper &) = delete; StringSearchHelper & operator =(const StringSearchHelper &) = delete; diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp index 7871e66970e..b006aebbcdb 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -438,6 +438,22 @@ TargetHitsMaxAdjustmentFactor::lookup(const Properties& props, double defaultVal return lookupDouble(props, NAME, defaultValue); } +const vespalib::string FuzzyAlgorithm::NAME("vespa.matching.fuzzy.algorithm"); +const vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::DEFAULT_VALUE(vespalib::FuzzyMatchingAlgorithm::BruteForce); + +vespalib::FuzzyMatchingAlgorithm +FuzzyAlgorithm::lookup(const Properties& props) +{ + return lookup(props, DEFAULT_VALUE); +} + +vespalib::FuzzyMatchingAlgorithm +FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value) +{ + auto value = lookupString(props, NAME, vespalib::to_string(default_value)); + return vespalib::fuzzy_matching_algorithm_from_string(value, default_value); +} + } // namespace matching namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h index 4f38a27d3fe..1f16d6b5f57 100644 --- a/searchlib/src/vespa/searchlib/fef/indexproperties.h +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -2,9 +2,10 @@ #pragma once +#include <vespa/searchlib/common/feature.h> +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> #include <vespa/vespalib/stllike/string.h> #include <vector> -#include <vespa/searchlib/common/feature.h> namespace search::fef { class Properties; } @@ -328,6 +329,16 @@ namespace matching { static double lookup(const Properties &props); static double lookup(const Properties &props, double defaultValue); }; + + /** + * Property to control the algorithm using for fuzzy matching. + **/ + struct FuzzyAlgorithm { + static const vespalib::string NAME; + static const vespalib::FuzzyMatchingAlgorithm DEFAULT_VALUE; + static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props); + static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value); + }; } namespace softtimeout { diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp index 9d4e547feef..02b56701cdb 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -69,6 +69,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i _global_filter_lower_limit(0.0), _global_filter_upper_limit(1.0), _target_hits_max_adjustment_factor(20.0), + _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::BruteForce), _mutateOnMatch(), _mutateOnFirstPhase(), _mutateOnSecondPhase(), @@ -123,6 +124,7 @@ RankSetup::configure() set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties())); set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties())); set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties())); + set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties())); _mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties()); _mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties()); _mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties()); diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 72432c2ed8a..3170f965e58 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -8,6 +8,7 @@ #include "blueprintresolver.h" #include "rank_program.h" #include <vespa/searchlib/common/stringmap.h> +#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h> namespace search::fef { @@ -77,6 +78,7 @@ private: double _global_filter_lower_limit; double _global_filter_upper_limit; double _target_hits_max_adjustment_factor; + vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm; MutateOperation _mutateOnMatch; MutateOperation _mutateOnFirstPhase; MutateOperation _mutateOnSecondPhase; @@ -396,6 +398,8 @@ public: double get_global_filter_upper_limit() const { return _global_filter_upper_limit; } void set_target_hits_max_adjustment_factor(double v) { _target_hits_max_adjustment_factor = v; } double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; } + void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; } + vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; } /** * This method may be used to indicate that certain features |