summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-09-21 17:26:22 +0200
committerGitHub <noreply@github.com>2023-09-21 17:26:22 +0200
commit30d66b745c033484184029fb8bf688b8a79f17d4 (patch)
treea17deca77a8d635f4ff6a0e293dd444f2aafe3a7 /searchlib/src
parent95daa49ef952798c71b096b28a9ccd3c6f124478 (diff)
parent9edf3caed8ecad63d1f1bb5b07510934690cc6d2 (diff)
Merge pull request #28606 from vespa-engine/geirst/fuzzy-matching-algorithm-query-property
Add query property to control fuzzy matching algorithm.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/ranksetup/ranksetup_test.cpp2
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/search_context_params.h12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_matcher.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_matcher.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_context.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_context.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h4
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h13
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h4
26 files changed, 134 insertions, 31 deletions
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
index f708df0a862..8d51eb56cc3 100644
--- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp
+++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
@@ -536,6 +536,7 @@ void RankSetupTest::testRankSetup()
env.getProperties().add(matching::GlobalFilterLowerLimit::NAME, "0.3");
env.getProperties().add(matching::GlobalFilterUpperLimit::NAME, "0.7");
env.getProperties().add(matching::TargetHitsMaxAdjustmentFactor::NAME, "5.0");
+ env.getProperties().add(matching::FuzzyAlgorithm::NAME, "dfa_implicit");
RankSetup rs(_factory, env);
EXPECT_FALSE(rs.has_match_features());
@@ -577,6 +578,7 @@ void RankSetupTest::testRankSetup()
EXPECT_EQUAL(rs.get_global_filter_lower_limit(), 0.3);
EXPECT_EQUAL(rs.get_global_filter_upper_limit(), 0.7);
EXPECT_EQUAL(rs.get_target_hits_max_adjustment_factor(), 5.0);
+ EXPECT_EQUAL(rs.get_fuzzy_matching_algorithm(), vespalib::FuzzyMatchingAlgorithm::DfaImplicit);
}
bool
diff --git a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
index 8ed7eadf919..1c3b32bd777 100644
--- a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
+++ b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
@@ -3,6 +3,8 @@
#pragma once
#include "i_document_meta_store_context.h"
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <cstddef>
#include <limits>
#include <cstdint>
@@ -21,6 +23,8 @@ private:
uint32_t _diversityCutoffGroups;
bool _useBitVector;
bool _diversityCutoffStrict;
+ vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
+
public:
SearchContextParams()
@@ -28,13 +32,15 @@ public:
_metaStoreReadGuard(nullptr),
_diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
_useBitVector(false),
- _diversityCutoffStrict(false)
+ _diversityCutoffStrict(false),
+ _fuzzy_matching_algorithm(search::fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
{ }
bool useBitVector() const { return _useBitVector; }
const IAttributeVector * diversityAttribute() const { return _diversityAttribute; }
uint32_t diversityCutoffGroups() const { return _diversityCutoffGroups; }
bool diversityCutoffStrict() const { return _diversityCutoffStrict; }
const IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard() const { return _metaStoreReadGuard; }
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
SearchContextParams &useBitVector(bool value) {
_useBitVector = value;
@@ -56,6 +62,10 @@ public:
_metaStoreReadGuard = readGuard;
return *this;
}
+ SearchContextParams& fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm value) {
+ _fuzzy_matching_algorithm = value;
+ return *this;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 453b7b321b9..1519bb14554 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -713,6 +713,7 @@ public:
template <class TermNode>
void visitTerm(TermNode &n) {
SearchContextParams scParams = createContextParams(_field.isFilter());
+ scParams.fuzzy_matching_algorithm(getRequestContext().get_attribute_blueprint_params().fuzzy_matching_algorithm);
const string stack = StackDumpCreator::create(n);
setResult(std::make_unique<AttributeFieldBlueprint>(_field, _attr, stack, scParams));
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index 64213235c23..1f9a3ebfa7e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -14,20 +15,24 @@ struct AttributeBlueprintParams
double global_filter_lower_limit;
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
- double target_hits_max_adjustment_factor_in)
+ double target_hits_max_adjustment_factor_in,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
- target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in)
+ target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
{
}
AttributeBlueprintParams()
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
- fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE)
+ fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
index 3ae342be61b..f418e698585 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
@@ -4,6 +4,8 @@
#include "multi_string_enum_search_context.h"
#include "enumhintsearchcontext.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
+
namespace search::attribute {
@@ -17,7 +19,12 @@ class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>,
public EnumHintSearchContext
{
public:
- MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values);
+ MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store,
+ uint32_t doc_id_limit, uint64_t num_values);
~MultiStringEnumHintSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
index fc1f72c940f..f4b96a46e3d 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
@@ -6,8 +6,13 @@
namespace search::attribute {
template <typename M>
-MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values)
- : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping_read_view, enum_store),
+MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store,
+ uint32_t doc_id_limit, uint64_t num_values)
+ : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, mv_mapping_read_view, enum_store),
EnumHintSearchContext(enum_store.get_dictionary(),
doc_id_limit, num_values)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
index 1787ea0086d..c9b8e8271b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
@@ -4,6 +4,7 @@
#include "multi_enum_search_context.h"
#include "string_search_context.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -15,7 +16,11 @@ template <typename M>
class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M>
{
public:
- MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store);
+ MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store);
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
index 1d74db04373..48d1e8b6406 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
@@ -9,8 +9,12 @@
namespace search::attribute {
template <typename M>
-MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store)
- : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping_read_view, enum_store)
+MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store)
+ : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm), toBeSearched, mv_mapping_read_view, enum_store)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index 43bb1c5ebb0..53e5f0d2e12 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -46,11 +46,12 @@ MultiValueStringAttributeT<B, M>::freezeEnumDictionary()
template <typename B, typename M>
std::unique_ptr<attribute::SearchContext>
MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm,
- const attribute::SearchContextParams &) const
+ const attribute::SearchContextParams &params) const
{
bool cased = this->get_match_is_cased();
auto doc_id_limit = this->getCommittedDocIdLimit();
- return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues());
+ return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(),
+ *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues());
}
template <typename B, typename M>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index fe52b785fa7..3da6357bb53 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -99,7 +99,7 @@ MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm,
using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>;
bool cased = this->get_match_is_cased();
auto doc_id_limit = this->getCommittedDocIdLimit();
- BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
+ BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
index 2d1748cefa5..95ba37d85be 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
@@ -5,8 +5,13 @@
namespace search::attribute {
-SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values)
- : SingleStringEnumSearchContext(std::move(qTerm), cased, toBeSearched, enum_indices, enum_store),
+SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store,
+ uint64_t num_values)
+ : SingleStringEnumSearchContext(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, enum_indices, enum_store),
EnumHintSearchContext(enum_store.get_dictionary(),
enum_indices.size(), num_values)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
index f157bf17a71..595d1ac8c57 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
@@ -4,6 +4,7 @@
#include "single_string_enum_search_context.h"
#include "enumhintsearchcontext.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -16,7 +17,12 @@ class SingleStringEnumHintSearchContext : public SingleStringEnumSearchContext,
public EnumHintSearchContext
{
public:
- SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values);
+ SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store,
+ uint64_t num_values);
~SingleStringEnumHintSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
index 8d23eaf7af0..42aebe9f814 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
@@ -6,8 +6,13 @@
namespace search::attribute {
-SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store)
- : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased), toBeSearched, enum_indices, enum_store)
+SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store)
+ : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm),
+ toBeSearched, enum_indices, enum_store)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
index b8014b1b0e3..71c62af33aa 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
@@ -4,6 +4,7 @@
#include "single_enum_search_context.h"
#include "string_search_context.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -14,7 +15,11 @@ namespace search::attribute {
class SingleStringEnumSearchContext : public SingleEnumSearchContext<const char*, StringSearchContext>
{
public:
- SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store);
+ SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store);
SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept;
~SingleStringEnumSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index c3f5c295260..c4c6fc97053 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -43,11 +43,12 @@ SingleValueStringAttributeT<B>::freezeEnumDictionary()
template <typename B>
std::unique_ptr<attribute::SearchContext>
SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm,
- const attribute::SearchContextParams &) const
+ const attribute::SearchContextParams& params) const
{
bool cased = this->get_match_is_cased();
auto docid_limit = this->getCommittedDocIdLimit();
- return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues());
+ return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(),
+ *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
index 60847636baa..20d672411f8 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -146,7 +146,7 @@ SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm,
using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, vespalib::btree::BTreeNoLeafData>;
bool cased = this->get_match_is_cased();
auto docid_limit = this->getCommittedDocIdLimit();
- BaseSC base_sc(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore);
+ BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore);
return std::make_unique<SC>(std::move(base_sc),
params.useBitVector(),
*this);
diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
index bc3637e7215..8b755d5f3b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
@@ -5,9 +5,9 @@
namespace search::attribute {
-StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased)
+StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: _query_term(static_cast<QueryTermUCS4 *>(query_term.release())),
- _helper(*_query_term, cased)
+ _helper(*_query_term, cased, fuzzy_matching_algorithm)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.h b/searchlib/src/vespa/searchlib/attribute/string_matcher.h
index ea4debecc0d..05089e1251a 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.h
@@ -3,6 +3,7 @@
#pragma once
#include "string_search_helper.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search { class QueryTermSimple; }
@@ -18,7 +19,7 @@ private:
std::unique_ptr<QueryTermUCS4> _query_term;
attribute::StringSearchHelper _helper;
public:
- StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased);
+ StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm);
StringMatcher(StringMatcher&&) noexcept;
~StringMatcher();
protected:
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
index fadf7a3151d..119b4a60d0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
@@ -9,9 +9,10 @@
namespace search::attribute {
-StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased)
+StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term,
+ bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: SearchContext(to_be_searched),
- StringMatcher(std::move(query_term), cased)
+ StringMatcher(std::move(query_term), cased, fuzzy_matching_algorithm)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.h b/searchlib/src/vespa/searchlib/attribute/string_search_context.h
index a0014379436..e459153d2b8 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.h
@@ -4,6 +4,7 @@
#include "search_context.h"
#include "string_matcher.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search {
@@ -24,7 +25,8 @@ class StringSearchContext : public SearchContext, public StringMatcher
protected:
using MatcherType = StringMatcher;
public:
- StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased);
+ StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term,
+ bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm);
StringSearchContext(const AttributeVector& to_be_searched, StringMatcher&& matcher);
StringSearchContext(StringSearchContext &&) noexcept;
~StringSearchContext() override;
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 60c00a043d0..1efe39667b8 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -9,7 +9,7 @@
namespace search::attribute {
-StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
+StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: _regex(),
_fuzzyMatcher(),
_term(),
@@ -24,6 +24,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None)
: vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
} else if (isFuzzy()) {
+ (void) fuzzy_matching_algorithm;
+ // TODO: Select implementation based on algorithm.
_fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(),
term.getFuzzyMaxEditDistance(),
term.getFuzzyPrefixLength(),
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
index 3db0d4dbb5f..0e7a116a874 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/regex/regex.h>
namespace vespalib { class FuzzyMatcher; }
@@ -16,7 +17,8 @@ namespace search::attribute {
class StringSearchHelper {
public:
using FuzzyMatcher = vespalib::FuzzyMatcher;
- StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
+ StringSearchHelper(QueryTermUCS4 & qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm = vespalib::FuzzyMatchingAlgorithm::BruteForce);
StringSearchHelper(StringSearchHelper&&) noexcept;
StringSearchHelper(const StringSearchHelper &) = delete;
StringSearchHelper & operator =(const StringSearchHelper &) = delete;
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 7871e66970e..b006aebbcdb 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -438,6 +438,22 @@ TargetHitsMaxAdjustmentFactor::lookup(const Properties& props, double defaultVal
return lookupDouble(props, NAME, defaultValue);
}
+const vespalib::string FuzzyAlgorithm::NAME("vespa.matching.fuzzy.algorithm");
+const vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::DEFAULT_VALUE(vespalib::FuzzyMatchingAlgorithm::BruteForce);
+
+vespalib::FuzzyMatchingAlgorithm
+FuzzyAlgorithm::lookup(const Properties& props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+vespalib::FuzzyMatchingAlgorithm
+FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value)
+{
+ auto value = lookupString(props, NAME, vespalib::to_string(default_value));
+ return vespalib::fuzzy_matching_algorithm_from_string(value, default_value);
+}
+
} // namespace matching
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 4f38a27d3fe..1f16d6b5f57 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -2,9 +2,10 @@
#pragma once
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/stllike/string.h>
#include <vector>
-#include <vespa/searchlib/common/feature.h>
namespace search::fef { class Properties; }
@@ -328,6 +329,16 @@ namespace matching {
static double lookup(const Properties &props);
static double lookup(const Properties &props, double defaultValue);
};
+
+ /**
+ * Property to control the algorithm using for fuzzy matching.
+ **/
+ struct FuzzyAlgorithm {
+ static const vespalib::string NAME;
+ static const vespalib::FuzzyMatchingAlgorithm DEFAULT_VALUE;
+ static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props);
+ static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value);
+ };
}
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 9d4e547feef..02b56701cdb 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -69,6 +69,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::BruteForce),
_mutateOnMatch(),
_mutateOnFirstPhase(),
_mutateOnSecondPhase(),
@@ -123,6 +124,7 @@ RankSetup::configure()
set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties()));
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
+ set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 72432c2ed8a..3170f965e58 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -8,6 +8,7 @@
#include "blueprintresolver.h"
#include "rank_program.h"
#include <vespa/searchlib/common/stringmap.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::fef {
@@ -77,6 +78,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
MutateOperation _mutateOnSecondPhase;
@@ -396,6 +398,8 @@ public:
double get_global_filter_upper_limit() const { return _global_filter_upper_limit; }
void set_target_hits_max_adjustment_factor(double v) { _target_hits_max_adjustment_factor = v; }
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
+ void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
+ vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
/**
* This method may be used to indicate that certain features