diff options
Diffstat (limited to 'vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp')
-rw-r--r-- | vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp index 55dd459ff26..11a822d2d7e 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp +++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp @@ -22,16 +22,20 @@ struct ExplicitDfaMatcher { const std::span<const DfaNodeType> _nodes; const bool _is_cased; + const bool _is_prefix; - ExplicitDfaMatcher(const std::span<const DfaNodeType> nodes, bool is_cased) noexcept + ExplicitDfaMatcher(const std::span<const DfaNodeType> nodes, bool is_cased, bool is_prefix) noexcept : _nodes(nodes), - _is_cased(is_cased) + _is_cased(is_cased), + _is_prefix(is_prefix) {} static constexpr uint8_t max_edits() noexcept { return MaxEdits; } bool is_cased() const noexcept { return _is_cased; } + bool is_prefix() const noexcept { return _is_prefix; } + StateType start() const noexcept { return &_nodes[0]; } @@ -99,21 +103,21 @@ ExplicitLevenshteinDfaImpl<MaxEdits>::~ExplicitLevenshteinDfaImpl() = default; template <uint8_t MaxEdits> LevenshteinDfa::MatchResult ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str) const { - ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased); + ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased, _is_prefix); return MatchAlgorithm<MaxEdits>::match(matcher, u8str); } template <uint8_t MaxEdits> LevenshteinDfa::MatchResult ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str, std::string& successor_out) const { - ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased); + ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased, _is_prefix); return MatchAlgorithm<MaxEdits>::match(matcher, u8str, successor_out); } template <uint8_t MaxEdits> LevenshteinDfa::MatchResult ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str, std::vector<uint32_t>& successor_out) const { - ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased); + ExplicitDfaMatcher<MaxEdits> matcher(_nodes, _is_cased, _is_prefix); return MatchAlgorithm<MaxEdits>::match(matcher, u8str, successor_out); } @@ -199,10 +203,12 @@ class ExplicitLevenshteinDfaBuilderImpl : public DfaSteppingBase<Traits> { using Base::transitions; const bool _is_cased; + const bool _is_prefix; public: - ExplicitLevenshteinDfaBuilderImpl(std::span<const uint32_t> str, bool is_cased) noexcept + ExplicitLevenshteinDfaBuilderImpl(std::span<const uint32_t> str, bool is_cased, bool is_prefix) noexcept : DfaSteppingBase<Traits>(str), - _is_cased(is_cased) + _is_cased(is_cased), + _is_prefix(is_prefix) { assert(str.size() < UINT32_MAX / max_out_edges_per_node()); } @@ -217,7 +223,7 @@ public: template <typename Traits> LevenshteinDfa ExplicitLevenshteinDfaBuilderImpl<Traits>::build_dfa() const { - auto dfa = std::make_unique<ExplicitLevenshteinDfaImpl<max_edits()>>(_is_cased); + auto dfa = std::make_unique<ExplicitLevenshteinDfaImpl<max_edits()>>(_is_cased, _is_prefix); ExploreState<StateType> exp; // Use BFS instead of DFS to ensure most node edges point to nodes that are allocated _after_ // the parent node, which means the CPU can skip ahead instead of ping-ponging back and forth. @@ -257,7 +263,7 @@ LevenshteinDfa ExplicitLevenshteinDfaBuilderImpl<Traits>::build_dfa() const { template <typename Traits> LevenshteinDfa ExplicitLevenshteinDfaBuilder<Traits>::build_dfa() const { - ExplicitLevenshteinDfaBuilderImpl<Traits> builder(_u32_str_buf, _is_cased); + ExplicitLevenshteinDfaBuilderImpl<Traits> builder(_u32_str_buf, _is_cased, _is_prefix); return builder.build_dfa(); } |