summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-09-21 13:32:47 +0000
committerGeir Storli <geirst@yahooinc.com>2023-09-21 13:38:52 +0000
commit9edf3caed8ecad63d1f1bb5b07510934690cc6d2 (patch)
tree209187088bfa38a085798d605ddf7bd2c68b2bd5
parent92d656cb14e33c4aea1677241aa687bdc70d5bc1 (diff)
Add query property to control fuzzy matching algorithm.
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp20
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp4
-rw-r--r--searchlib/src/tests/ranksetup/ranksetup_test.cpp2
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/search_context_params.h12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_matcher.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_matcher.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_context.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_context.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h4
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h13
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h4
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.cpp51
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h26
31 files changed, 229 insertions, 38 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index 6ef462f80c4..ec549ee6f71 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -1135,12 +1135,15 @@ TEST("require that docsum matcher can extract matching elements from single attr
EXPECT_EQUAL(list[1], 3u);
}
+using FMA = vespalib::FuzzyMatchingAlgorithm;
+
struct AttributeBlueprintParamsFixture {
BlueprintFactory factory;
search::fef::test::IndexEnvironment index_env;
RankSetup rank_setup;
Properties rank_properties;
- AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor)
+ AttributeBlueprintParamsFixture(double lower_limit, double upper_limit, double target_hits_max_adjustment_factor,
+ FMA fuzzy_matching_algorithm)
: factory(),
index_env(),
rank_setup(factory, index_env),
@@ -1149,36 +1152,41 @@ struct AttributeBlueprintParamsFixture {
rank_setup.set_global_filter_lower_limit(lower_limit);
rank_setup.set_global_filter_upper_limit(upper_limit);
rank_setup.set_target_hits_max_adjustment_factor(target_hits_max_adjustment_factor);
+ rank_setup.set_fuzzy_matching_algorithm(fuzzy_matching_algorithm);
}
void set_query_properties(vespalib::stringref lower_limit, vespalib::stringref upper_limit,
- vespalib::stringref target_hits_max_adjustment_factor) {
+ vespalib::stringref target_hits_max_adjustment_factor,
+ const vespalib::string fuzzy_matching_algorithm) {
rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit);
rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit);
rank_properties.add(TargetHitsMaxAdjustmentFactor::NAME, target_hits_max_adjustment_factor);
+ rank_properties.add(FuzzyAlgorithm::NAME, fuzzy_matching_algorithm);
}
AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const {
return MatchToolsFactory::extract_attribute_blueprint_params(rank_setup, rank_properties, active_docids, docid_limit);
}
};
-TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("attribute blueprint params are extracted from rank profile", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
auto params = f.extract();
EXPECT_EQUAL(0.2, params.global_filter_lower_limit);
EXPECT_EQUAL(0.8, params.global_filter_upper_limit);
EXPECT_EQUAL(5.0, params.target_hits_max_adjustment_factor);
+ EXPECT_EQUAL(FMA::BruteForce, params.fuzzy_matching_algorithm);
}
-TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("attribute blueprint params are extracted from query", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
- f.set_query_properties("0.15", "0.75", "3.0");
+ f.set_query_properties("0.15", "0.75", "3.0", "dfa_explicit");
auto params = f.extract();
EXPECT_EQUAL(0.15, params.global_filter_lower_limit);
EXPECT_EQUAL(0.75, params.global_filter_upper_limit);
EXPECT_EQUAL(3.0, params.target_hits_max_adjustment_factor);
+ EXPECT_EQUAL(FMA::DfaExplicit, params.fuzzy_matching_algorithm);
}
-TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0))
+TEST_F("global filter params are scaled with active hit ratio", AttributeBlueprintParamsFixture(0.2, 0.8, 5.0, FMA::BruteForce))
{
auto params = f.extract(5, 10);
EXPECT_EQUAL(0.12, params.global_filter_lower_limit);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index f62f4c60a6c..5ae671b88cb 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -331,6 +331,7 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
double lower_limit = GlobalFilterLowerLimit::lookup(rank_properties, rank_setup.get_global_filter_lower_limit());
double upper_limit = GlobalFilterUpperLimit::lookup(rank_properties, rank_setup.get_global_filter_upper_limit());
double target_hits_max_adjustment_factor = TargetHitsMaxAdjustmentFactor::lookup(rank_properties, rank_setup.get_target_hits_max_adjustment_factor());
+ auto fuzzy_matching_algorithm = FuzzyAlgorithm::lookup(rank_properties, rank_setup.get_fuzzy_matching_algorithm());
// Note that we count the reserved docid 0 as active.
// This ensures that when searchable-copies=1, the ratio is 1.0.
@@ -338,7 +339,8 @@ MatchToolsFactory::extract_attribute_blueprint_params(const RankSetup& rank_setu
return {lower_limit * active_hit_ratio,
upper_limit * active_hit_ratio,
- target_hits_max_adjustment_factor};
+ target_hits_max_adjustment_factor,
+ fuzzy_matching_algorithm};
}
AttributeOperationTask::AttributeOperationTask(const RequestContext & requestContext,
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
index f708df0a862..8d51eb56cc3 100644
--- a/searchlib/src/tests/ranksetup/ranksetup_test.cpp
+++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
@@ -536,6 +536,7 @@ void RankSetupTest::testRankSetup()
env.getProperties().add(matching::GlobalFilterLowerLimit::NAME, "0.3");
env.getProperties().add(matching::GlobalFilterUpperLimit::NAME, "0.7");
env.getProperties().add(matching::TargetHitsMaxAdjustmentFactor::NAME, "5.0");
+ env.getProperties().add(matching::FuzzyAlgorithm::NAME, "dfa_implicit");
RankSetup rs(_factory, env);
EXPECT_FALSE(rs.has_match_features());
@@ -577,6 +578,7 @@ void RankSetupTest::testRankSetup()
EXPECT_EQUAL(rs.get_global_filter_lower_limit(), 0.3);
EXPECT_EQUAL(rs.get_global_filter_upper_limit(), 0.7);
EXPECT_EQUAL(rs.get_target_hits_max_adjustment_factor(), 5.0);
+ EXPECT_EQUAL(rs.get_fuzzy_matching_algorithm(), vespalib::FuzzyMatchingAlgorithm::DfaImplicit);
}
bool
diff --git a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
index 8ed7eadf919..1c3b32bd777 100644
--- a/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
+++ b/searchlib/src/vespa/searchcommon/attribute/search_context_params.h
@@ -3,6 +3,8 @@
#pragma once
#include "i_document_meta_store_context.h"
+#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <cstddef>
#include <limits>
#include <cstdint>
@@ -21,6 +23,8 @@ private:
uint32_t _diversityCutoffGroups;
bool _useBitVector;
bool _diversityCutoffStrict;
+ vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
+
public:
SearchContextParams()
@@ -28,13 +32,15 @@ public:
_metaStoreReadGuard(nullptr),
_diversityCutoffGroups(std::numeric_limits<uint32_t>::max()),
_useBitVector(false),
- _diversityCutoffStrict(false)
+ _diversityCutoffStrict(false),
+ _fuzzy_matching_algorithm(search::fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
{ }
bool useBitVector() const { return _useBitVector; }
const IAttributeVector * diversityAttribute() const { return _diversityAttribute; }
uint32_t diversityCutoffGroups() const { return _diversityCutoffGroups; }
bool diversityCutoffStrict() const { return _diversityCutoffStrict; }
const IDocumentMetaStoreContext::IReadGuard::SP * metaStoreReadGuard() const { return _metaStoreReadGuard; }
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
SearchContextParams &useBitVector(bool value) {
_useBitVector = value;
@@ -56,6 +62,10 @@ public:
_metaStoreReadGuard = readGuard;
return *this;
}
+ SearchContextParams& fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm value) {
+ _fuzzy_matching_algorithm = value;
+ return *this;
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 453b7b321b9..1519bb14554 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -713,6 +713,7 @@ public:
template <class TermNode>
void visitTerm(TermNode &n) {
SearchContextParams scParams = createContextParams(_field.isFilter());
+ scParams.fuzzy_matching_algorithm(getRequestContext().get_attribute_blueprint_params().fuzzy_matching_algorithm);
const string stack = StackDumpCreator::create(n);
setResult(std::make_unique<AttributeFieldBlueprint>(_field, _attr, stack, scParams));
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
index 64213235c23..1f9a3ebfa7e 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_params.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/searchlib/fef/indexproperties.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -14,20 +15,24 @@ struct AttributeBlueprintParams
double global_filter_lower_limit;
double global_filter_upper_limit;
double target_hits_max_adjustment_factor;
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm;
AttributeBlueprintParams(double global_filter_lower_limit_in,
double global_filter_upper_limit_in,
- double target_hits_max_adjustment_factor_in)
+ double target_hits_max_adjustment_factor_in,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm_in)
: global_filter_lower_limit(global_filter_lower_limit_in),
global_filter_upper_limit(global_filter_upper_limit_in),
- target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in)
+ target_hits_max_adjustment_factor(target_hits_max_adjustment_factor_in),
+ fuzzy_matching_algorithm(fuzzy_matching_algorithm_in)
{
}
AttributeBlueprintParams()
: AttributeBlueprintParams(fef::indexproperties::matching::GlobalFilterLowerLimit::DEFAULT_VALUE,
fef::indexproperties::matching::GlobalFilterUpperLimit::DEFAULT_VALUE,
- fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE)
+ fef::indexproperties::matching::TargetHitsMaxAdjustmentFactor::DEFAULT_VALUE,
+ fef::indexproperties::matching::FuzzyAlgorithm::DEFAULT_VALUE)
{
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
index 3ae342be61b..f418e698585 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h
@@ -4,6 +4,8 @@
#include "multi_string_enum_search_context.h"
#include "enumhintsearchcontext.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
+
namespace search::attribute {
@@ -17,7 +19,12 @@ class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>,
public EnumHintSearchContext
{
public:
- MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values);
+ MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store,
+ uint32_t doc_id_limit, uint64_t num_values);
~MultiStringEnumHintSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
index fc1f72c940f..f4b96a46e3d 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp
@@ -6,8 +6,13 @@
namespace search::attribute {
template <typename M>
-MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values)
- : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping_read_view, enum_store),
+MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store,
+ uint32_t doc_id_limit, uint64_t num_values)
+ : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, mv_mapping_read_view, enum_store),
EnumHintSearchContext(enum_store.get_dictionary(),
doc_id_limit, num_values)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
index 1787ea0086d..c9b8e8271b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h
@@ -4,6 +4,7 @@
#include "multi_enum_search_context.h"
#include "string_search_context.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -15,7 +16,11 @@ template <typename M>
class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M>
{
public:
- MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store);
+ MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store);
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
index 1d74db04373..48d1e8b6406 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp
@@ -9,8 +9,12 @@
namespace search::attribute {
template <typename M>
-MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, MultiValueMappingReadView<M> mv_mapping_read_view, const EnumStoreT<const char*>& enum_store)
- : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping_read_view, enum_store)
+MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ MultiValueMappingReadView<M> mv_mapping_read_view,
+ const EnumStoreT<const char*>& enum_store)
+ : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm), toBeSearched, mv_mapping_read_view, enum_store)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index 43bb1c5ebb0..53e5f0d2e12 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -46,11 +46,12 @@ MultiValueStringAttributeT<B, M>::freezeEnumDictionary()
template <typename B, typename M>
std::unique_ptr<attribute::SearchContext>
MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm,
- const attribute::SearchContextParams &) const
+ const attribute::SearchContextParams &params) const
{
bool cased = this->get_match_is_cased();
auto doc_id_limit = this->getCommittedDocIdLimit();
- return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues());
+ return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(),
+ *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore, doc_id_limit, this->getStatus().getNumValues());
}
template <typename B, typename M>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index fe52b785fa7..3da6357bb53 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -99,7 +99,7 @@ MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm,
using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>;
bool cased = this->get_match_is_cased();
auto doc_id_limit = this->getCommittedDocIdLimit();
- BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
+ BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
index 2d1748cefa5..95ba37d85be 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp
@@ -5,8 +5,13 @@
namespace search::attribute {
-SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values)
- : SingleStringEnumSearchContext(std::move(qTerm), cased, toBeSearched, enum_indices, enum_store),
+SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store,
+ uint64_t num_values)
+ : SingleStringEnumSearchContext(std::move(qTerm), cased, fuzzy_matching_algorithm, toBeSearched, enum_indices, enum_store),
EnumHintSearchContext(enum_store.get_dictionary(),
enum_indices.size(), num_values)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
index f157bf17a71..595d1ac8c57 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h
@@ -4,6 +4,7 @@
#include "single_string_enum_search_context.h"
#include "enumhintsearchcontext.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -16,7 +17,12 @@ class SingleStringEnumHintSearchContext : public SingleStringEnumSearchContext,
public EnumHintSearchContext
{
public:
- SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store, uint64_t num_values);
+ SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store,
+ uint64_t num_values);
~SingleStringEnumHintSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
index 8d23eaf7af0..42aebe9f814 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp
@@ -6,8 +6,13 @@
namespace search::attribute {
-SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store)
- : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased), toBeSearched, enum_indices, enum_store)
+SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store)
+ : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased, fuzzy_matching_algorithm),
+ toBeSearched, enum_indices, enum_store)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
index b8014b1b0e3..71c62af33aa 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h
@@ -4,6 +4,7 @@
#include "single_enum_search_context.h"
#include "string_search_context.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::attribute {
@@ -14,7 +15,11 @@ namespace search::attribute {
class SingleStringEnumSearchContext : public SingleEnumSearchContext<const char*, StringSearchContext>
{
public:
- SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, EnumIndices enum_indices, const EnumStoreT<const char*>& enum_store);
+ SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm,
+ const AttributeVector& toBeSearched,
+ EnumIndices enum_indices,
+ const EnumStoreT<const char*>& enum_store);
SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept;
~SingleStringEnumSearchContext() override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index c3f5c295260..c4c6fc97053 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -43,11 +43,12 @@ SingleValueStringAttributeT<B>::freezeEnumDictionary()
template <typename B>
std::unique_ptr<attribute::SearchContext>
SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm,
- const attribute::SearchContextParams &) const
+ const attribute::SearchContextParams& params) const
{
bool cased = this->get_match_is_cased();
auto docid_limit = this->getCommittedDocIdLimit();
- return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues());
+ return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, params.fuzzy_matching_algorithm(),
+ *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore, this->getStatus().getNumValues());
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
index 60847636baa..20d672411f8 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -146,7 +146,7 @@ SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm,
using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, vespalib::btree::BTreeNoLeafData>;
bool cased = this->get_match_is_cased();
auto docid_limit = this->getCommittedDocIdLimit();
- BaseSC base_sc(std::move(qTerm), cased, *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore);
+ BaseSC base_sc(std::move(qTerm), cased, params.fuzzy_matching_algorithm(), *this, this->_enumIndices.make_read_view(docid_limit), this->_enumStore);
return std::make_unique<SC>(std::move(base_sc),
params.useBitVector(),
*this);
diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
index bc3637e7215..8b755d5f3b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.cpp
@@ -5,9 +5,9 @@
namespace search::attribute {
-StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased)
+StringMatcher::StringMatcher(std::unique_ptr<QueryTermSimple> query_term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: _query_term(static_cast<QueryTermUCS4 *>(query_term.release())),
- _helper(*_query_term, cased)
+ _helper(*_query_term, cased, fuzzy_matching_algorithm)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.h b/searchlib/src/vespa/searchlib/attribute/string_matcher.h
index ea4debecc0d..05089e1251a 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.h
@@ -3,6 +3,7 @@
#pragma once
#include "string_search_helper.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search { class QueryTermSimple; }
@@ -18,7 +19,7 @@ private:
std::unique_ptr<QueryTermUCS4> _query_term;
attribute::StringSearchHelper _helper;
public:
- StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased);
+ StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm);
StringMatcher(StringMatcher&&) noexcept;
~StringMatcher();
protected:
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
index fadf7a3151d..119b4a60d0c 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
@@ -9,9 +9,10 @@
namespace search::attribute {
-StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased)
+StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term,
+ bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: SearchContext(to_be_searched),
- StringMatcher(std::move(query_term), cased)
+ StringMatcher(std::move(query_term), cased, fuzzy_matching_algorithm)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.h b/searchlib/src/vespa/searchlib/attribute/string_search_context.h
index a0014379436..e459153d2b8 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.h
@@ -4,6 +4,7 @@
#include "search_context.h"
#include "string_matcher.h"
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search {
@@ -24,7 +25,8 @@ class StringSearchContext : public SearchContext, public StringMatcher
protected:
using MatcherType = StringMatcher;
public:
- StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased);
+ StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term,
+ bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm);
StringSearchContext(const AttributeVector& to_be_searched, StringMatcher&& matcher);
StringSearchContext(StringSearchContext &&) noexcept;
~StringSearchContext() override;
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 60c00a043d0..1efe39667b8 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -9,7 +9,7 @@
namespace search::attribute {
-StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
+StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm)
: _regex(),
_fuzzyMatcher(),
_term(),
@@ -24,6 +24,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None)
: vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
} else if (isFuzzy()) {
+ (void) fuzzy_matching_algorithm;
+ // TODO: Select implementation based on algorithm.
_fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(),
term.getFuzzyMaxEditDistance(),
term.getFuzzyPrefixLength(),
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
index 3db0d4dbb5f..0e7a116a874 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/regex/regex.h>
namespace vespalib { class FuzzyMatcher; }
@@ -16,7 +17,8 @@ namespace search::attribute {
class StringSearchHelper {
public:
using FuzzyMatcher = vespalib::FuzzyMatcher;
- StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
+ StringSearchHelper(QueryTermUCS4 & qTerm, bool cased,
+ vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm = vespalib::FuzzyMatchingAlgorithm::BruteForce);
StringSearchHelper(StringSearchHelper&&) noexcept;
StringSearchHelper(const StringSearchHelper &) = delete;
StringSearchHelper & operator =(const StringSearchHelper &) = delete;
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index 7871e66970e..b006aebbcdb 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -438,6 +438,22 @@ TargetHitsMaxAdjustmentFactor::lookup(const Properties& props, double defaultVal
return lookupDouble(props, NAME, defaultValue);
}
+const vespalib::string FuzzyAlgorithm::NAME("vespa.matching.fuzzy.algorithm");
+const vespalib::FuzzyMatchingAlgorithm FuzzyAlgorithm::DEFAULT_VALUE(vespalib::FuzzyMatchingAlgorithm::BruteForce);
+
+vespalib::FuzzyMatchingAlgorithm
+FuzzyAlgorithm::lookup(const Properties& props)
+{
+ return lookup(props, DEFAULT_VALUE);
+}
+
+vespalib::FuzzyMatchingAlgorithm
+FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value)
+{
+ auto value = lookupString(props, NAME, vespalib::to_string(default_value));
+ return vespalib::fuzzy_matching_algorithm_from_string(value, default_value);
+}
+
} // namespace matching
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 4f38a27d3fe..1f16d6b5f57 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -2,9 +2,10 @@
#pragma once
+#include <vespa/searchlib/common/feature.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/stllike/string.h>
#include <vector>
-#include <vespa/searchlib/common/feature.h>
namespace search::fef { class Properties; }
@@ -328,6 +329,16 @@ namespace matching {
static double lookup(const Properties &props);
static double lookup(const Properties &props, double defaultValue);
};
+
+ /**
+ * Property to control the algorithm using for fuzzy matching.
+ **/
+ struct FuzzyAlgorithm {
+ static const vespalib::string NAME;
+ static const vespalib::FuzzyMatchingAlgorithm DEFAULT_VALUE;
+ static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props);
+ static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value);
+ };
}
namespace softtimeout {
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 9d4e547feef..02b56701cdb 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -69,6 +69,7 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_global_filter_lower_limit(0.0),
_global_filter_upper_limit(1.0),
_target_hits_max_adjustment_factor(20.0),
+ _fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm::BruteForce),
_mutateOnMatch(),
_mutateOnFirstPhase(),
_mutateOnSecondPhase(),
@@ -123,6 +124,7 @@ RankSetup::configure()
set_global_filter_lower_limit(matching::GlobalFilterLowerLimit::lookup(_indexEnv.getProperties()));
set_global_filter_upper_limit(matching::GlobalFilterUpperLimit::lookup(_indexEnv.getProperties()));
set_target_hits_max_adjustment_factor(matching::TargetHitsMaxAdjustmentFactor::lookup(_indexEnv.getProperties()));
+ set_fuzzy_matching_algorithm(matching::FuzzyAlgorithm::lookup(_indexEnv.getProperties()));
_mutateOnMatch._attribute = mutate::on_match::Attribute::lookup(_indexEnv.getProperties());
_mutateOnMatch._operation = mutate::on_match::Operation::lookup(_indexEnv.getProperties());
_mutateOnFirstPhase._attribute = mutate::on_first_phase::Attribute::lookup(_indexEnv.getProperties());
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 72432c2ed8a..3170f965e58 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -8,6 +8,7 @@
#include "blueprintresolver.h"
#include "rank_program.h"
#include <vespa/searchlib/common/stringmap.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
namespace search::fef {
@@ -77,6 +78,7 @@ private:
double _global_filter_lower_limit;
double _global_filter_upper_limit;
double _target_hits_max_adjustment_factor;
+ vespalib::FuzzyMatchingAlgorithm _fuzzy_matching_algorithm;
MutateOperation _mutateOnMatch;
MutateOperation _mutateOnFirstPhase;
MutateOperation _mutateOnSecondPhase;
@@ -396,6 +398,8 @@ public:
double get_global_filter_upper_limit() const { return _global_filter_upper_limit; }
void set_target_hits_max_adjustment_factor(double v) { _target_hits_max_adjustment_factor = v; }
double get_target_hits_max_adjustment_factor() const { return _target_hits_max_adjustment_factor; }
+ void set_fuzzy_matching_algorithm(vespalib::FuzzyMatchingAlgorithm v) { _fuzzy_matching_algorithm = v; }
+ vespalib::FuzzyMatchingAlgorithm get_fuzzy_matching_algorithm() const { return _fuzzy_matching_algorithm; }
/**
* This method may be used to indicate that certain features
diff --git a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
index bdbb03bcfee..5e8d29980cd 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_library(vespalib_vespalib_fuzzy OBJECT
SOURCES
explicit_levenshtein_dfa.cpp
fuzzy_matcher.cpp
+ fuzzy_matching_algorithm.cpp
implicit_levenshtein_dfa.cpp
levenshtein_dfa.cpp
levenshtein_distance.cpp
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.cpp b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.cpp
new file mode 100644
index 00000000000..826b0beffd6
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.cpp
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fuzzy_matching_algorithm.h"
+
+namespace vespalib {
+
+namespace {
+
+const vespalib::string brute_force = "brute_force";
+const vespalib::string dfa_implicit = "dfa_implicit";
+const vespalib::string dfa_explicit = "dfa_explicit";
+
+}
+
+vespalib::string
+to_string(FuzzyMatchingAlgorithm algo)
+{
+ switch (algo) {
+ case FuzzyMatchingAlgorithm::BruteForce:
+ return brute_force;
+ case FuzzyMatchingAlgorithm::DfaImplicit:
+ return dfa_implicit;
+ case FuzzyMatchingAlgorithm::DfaExplicit:
+ return dfa_explicit;
+ default:
+ return "";
+ }
+}
+
+FuzzyMatchingAlgorithm
+fuzzy_matching_algorithm_from_string(const vespalib::string& algo,
+ FuzzyMatchingAlgorithm default_algo)
+{
+ if (algo == brute_force) {
+ return FuzzyMatchingAlgorithm::BruteForce;
+ } else if (algo == dfa_implicit) {
+ return FuzzyMatchingAlgorithm::DfaImplicit;
+ } else if (algo == dfa_explicit) {
+ return FuzzyMatchingAlgorithm::DfaExplicit;
+ }
+ return default_algo;
+}
+
+std::ostream&
+operator<<(std::ostream& out, FuzzyMatchingAlgorithm algo)
+{
+ out << to_string(algo);
+ return out;
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h
new file mode 100644
index 00000000000..83cb121fe5f
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <ostream>
+
+namespace vespalib {
+
+/**
+ * Algorithms that are supported for fuzzy matching.
+ */
+enum class FuzzyMatchingAlgorithm {
+ BruteForce,
+ DfaImplicit,
+ DfaExplicit
+};
+
+vespalib::string to_string(FuzzyMatchingAlgorithm algo);
+
+FuzzyMatchingAlgorithm fuzzy_matching_algorithm_from_string(const vespalib::string& algo,
+ FuzzyMatchingAlgorithm default_algo);
+
+std::ostream& operator<<(std::ostream& out, FuzzyMatchingAlgorithm algo);
+
+}