18 files changed, 2663 insertions, 3 deletions
diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt
index bc48e775711..00a89d0a604 100644
--- a/vespalib/src/tests/fuzzy/CMakeLists.txt
+++ b/vespalib/src/tests/fuzzy/CMakeLists.txt
@@ -16,3 +16,12 @@ vespa_add_executable(vespalib_levenshtein_distance_test_app TEST
         GTest::GTest
         )
 vespa_add_test(NAME vespalib_levenshtein_distance_test_app COMMAND vespalib_levenshtein_distance_test_app)
+
+vespa_add_executable(vespalib_levenshtein_dfa_test_app TEST
+    SOURCES
+    levenshtein_dfa_test.cpp
+    DEPENDS
+    vespalib
+    GTest::GTest
+)
+vespa_add_test(NAME vespalib_levenshtein_dfa_test_app COMMAND vespalib_levenshtein_dfa_test_app)
diff --git a/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp
new file mode 100644
index 00000000000..6966fd0b703
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/levenshtein_dfa_test.cpp
@@ -0,0 +1,507 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
+#include <vespa/vespalib/fuzzy/dfa_stepping_base.h>
+#include <vespa/vespalib/fuzzy/unicode_utils.h>
+#include <vespa/vespalib/fuzzy/levenshtein_distance.h> // For benchmarking purposes
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <charconv>
+#include <concepts>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <string_view>
+#include <gtest/gtest.h>
+
+using namespace ::testing;
+using namespace vespalib::fuzzy;
+namespace fs = std::filesystem;
+
+static std::string benchmark_dictionary;
+
+struct LevenshteinDfaTest : TestWithParam<LevenshteinDfa::DfaType> {
+
+    static LevenshteinDfa::DfaType dfa_type() noexcept { return GetParam(); }
+
+    static std::optional<uint32_t> calculate(std::string_view left, std::string_view right, uint32_t threshold) {
+        auto dfa_lhs = LevenshteinDfa::build(left, threshold, dfa_type());
+        auto maybe_match_lhs = dfa_lhs.match(right, nullptr);
+
+        auto dfa_rhs = LevenshteinDfa::build(right, threshold, dfa_type());
+        auto maybe_match_rhs = dfa_rhs.match(left, nullptr);
+
+        EXPECT_EQ(maybe_match_lhs.matches(), maybe_match_rhs.matches());
+        if (maybe_match_lhs.matches()) {
+            EXPECT_EQ(maybe_match_lhs.edits(), maybe_match_rhs.edits());
+            return {maybe_match_lhs.edits()};
+        }
+        return std::nullopt;
+    }
+
+    static std::optional<uint32_t> calculate(std::u8string_view left, std::u8string_view right, uint32_t threshold) {
+        std::string_view lhs_ch(reinterpret_cast<const char*>(left.data()), left.size());
+        std::string_view rhs_ch(reinterpret_cast<const char*>(right.data()), right.size());
+        return calculate(lhs_ch, rhs_ch, threshold);
+    }
+
+};
+
+INSTANTIATE_TEST_SUITE_P(AllDfaTypes,
+                         LevenshteinDfaTest,
+                         Values(LevenshteinDfa::DfaType::Explicit,
+                                LevenshteinDfa::DfaType::Implicit),
+                         PrintToStringParamName());
+
+// Same as existing non-DFA Levenshtein tests, but with some added instantiations
+// for smaller max distances.
+TEST_P(LevenshteinDfaTest, edge_cases_have_correct_edit_distance) {
+    EXPECT_EQ(calculate("abc", "abc", 2),  std::optional{0});
+    for (auto max : {1, 2}) {
+        EXPECT_EQ(calculate("abc", "ab1", max),  std::optional{1}) << max;
+        EXPECT_EQ(calculate("abc", "1bc", max),  std::optional{1}) << max;
+        EXPECT_EQ(calculate("abc", "a1c", max),  std::optional{1}) << max;
+        EXPECT_EQ(calculate("abc", "ab", max),   std::optional{1}) << max;
+        EXPECT_EQ(calculate("abc", "abcd", max), std::optional{1}) << max;
+        EXPECT_EQ(calculate("a", "", max),       std::optional{1}) << max;
+    }
+    EXPECT_EQ(calculate("bc", "abcd", 2),  std::optional{2});
+    EXPECT_EQ(calculate("ab", "abcd", 2),  std::optional{2});
+    EXPECT_EQ(calculate("cd", "abcd", 2),  std::optional{2});
+    EXPECT_EQ(calculate("ad", "abcd", 2),  std::optional{2});
+    EXPECT_EQ(calculate("abc", "a12", 2),  std::optional{2});
+    EXPECT_EQ(calculate("abc", "123", 2),  std::nullopt);
+    EXPECT_EQ(calculate("ab", "", 1),      std::nullopt);
+    EXPECT_EQ(calculate("ab", "", 2),      std::optional{2});
+    EXPECT_EQ(calculate("abc", "", 2),     std::nullopt);
+    EXPECT_EQ(calculate("abc", "123", 2),  std::nullopt);
+}
+
+TEST_P(LevenshteinDfaTest, distance_is_in_utf32_code_point_space) {
+    // Each hiragana/katakana/kanji corresponds to multiple (3) UTF-8 chars but a single UTF-32 code point.
+    EXPECT_EQ(calculate(u8"猫", u8"猫", 2), std::optional{0});
+    EXPECT_EQ(calculate(u8"猫", u8"犬", 2), std::optional{1});
+    EXPECT_EQ(calculate(u8"猫と犬", u8"犬と猫", 2), std::optional{2});
+    EXPECT_EQ(calculate(u8"猫は好き", u8"犬が好き", 2), std::optional{2});
+    EXPECT_EQ(calculate(u8"カラオケ", u8"カラオケ", 2), std::optional{0});
+    EXPECT_EQ(calculate(u8"カラオケ", u8"カラoケ", 2), std::optional{1});
+    EXPECT_EQ(calculate(u8"カラオケ", u8"カraオケ", 2), std::optional{2});
+    EXPECT_EQ(calculate(u8"kaラオケ", u8"カラオケ", 2), std::optional{2});
+    EXPECT_EQ(calculate(u8"カラオケ", u8"カラoke", 2), std::nullopt);
+}
+
+void test_dfa_successor(const LevenshteinDfa& dfa, std::string_view source, std::string_view expected_successor) {
+    std::string successor;
+    auto m = dfa.match(source, &successor);
+    if (m.matches()) {
+        FAIL() << "Expected '" << source << "' to emit a successor, but it "
+               << "matched with " << static_cast<uint32_t>(m.edits())
+               << " edits (of max " << static_cast<uint32_t>(m.max_edits()) <<  " edits)";
+    }
+    EXPECT_EQ(successor, expected_successor);
+    EXPECT_TRUE(dfa.match(successor, nullptr).matches());
+}
+
+TEST_P(LevenshteinDfaTest, can_generate_successors_to_mismatching_source_strings) {
+    auto dfa = LevenshteinDfa::build("food", 1, dfa_type());
+
+    test_dfa_successor(dfa, "",       "\x01""food");
+    test_dfa_successor(dfa, "faa",    "faod");
+    test_dfa_successor(dfa, "fooooo", "foop");
+    test_dfa_successor(dfa, "ooof",   "pfood");
+    test_dfa_successor(dfa, "fo",     "fo\x01""d");
+    test_dfa_successor(dfa, "oo",     "ood");
+    test_dfa_successor(dfa, "ooo",    "oood");
+    test_dfa_successor(dfa, "foh",    "fohd");
+    test_dfa_successor(dfa, "foho",   "fohod");
+    test_dfa_successor(dfa, "foxx",   "foyd");
+    test_dfa_successor(dfa, "xfa",    "xfood");
+    test_dfa_successor(dfa, "gg",     "good");
+    test_dfa_successor(dfa, "gp",     "hfood");
+    test_dfa_successor(dfa, "ep",     "f\x01""od");
+    test_dfa_successor(dfa, "hfoodz", "hood");
+    test_dfa_successor(dfa, "aooodz", "bfood");
+
+    // Also works with Unicode
+    // 2 chars
+    test_dfa_successor(dfa, "\xc3\x86""x",                // "Æx"
+                            "\xc3\x87""food");            // "Çfood"
+    // 3 chars
+    test_dfa_successor(dfa, "\xe7\x8c\xab""\xe3\x81\xaf", // "猫は"
+                            "\xe7\x8c\xac""food");        // "猬food"
+    // 4 chars
+    test_dfa_successor(dfa, "\xf0\x9f\xa4\xa9""abc",      // <starry eyed emoji>abc
+                            "\xf0\x9f\xa4\xa9""food");    // <starry eyed emoji>food
+
+    // Note that as a general rule, emojis are fickle beasts to deal with since a single
+    // emoji often takes up multiple code points, which we consider separate characters
+    // but a user sees as a single actual rendered glyph.
+    // Multi-code point character edit distance support is left as an exercise for the reader :D
+}
+
+TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_max_unicode_code_point_input) {
+    auto dfa = LevenshteinDfa::build("food", 1, dfa_type());
+    // The successor string must be lexicographically larger than the input string.
+    // In the presence of a wildcard output edge we handle this by increase the input
+    // character by 1 and encoding it back as UTF-8.
+    // It is possible (though arguably very unlikely) that the input character is
+    // U+10FFFF, which is the maximum valid Unicode character. We have to ensure that
+    // we can encode U+10FFFF + 1, even though it's technically outside the valid range.
+    // Luckily, UTF-8 can technically (there's that word again) encode up to U+1FFFFF,
+    // so the resulting string is byte-wise greater, and that's what matters since we
+    // don't guarantee that the successor string is _valid_ UTF-8.
+    // This problem does not happen with the target string, as it's an invalid character
+    // and will be replaced with the Unicode replacement char before we ever see it.
+    test_dfa_successor(dfa, "\xf4\x8f\xbf\xbf""xyz", // U+10FFFF
+                            "\xf4\x90\x80\x80""food");// U+10FFFF+1
+}
+
+TEST_P(LevenshteinDfaTest, successor_is_well_defined_for_empty_target) {
+    auto dfa = LevenshteinDfa::build("", 1, dfa_type());
+    test_dfa_successor(dfa, "aa",    "b");
+    test_dfa_successor(dfa, "b\x01", "c");
+    test_dfa_successor(dfa, "vespa", "w");
+}
+
+// We should normally be able to rely on higher-level components to ensure we
+// only receive valid UTF-8, but make sure we don't choke on it if we do get it.
+TEST_P(LevenshteinDfaTest, malformed_utf8_is_replaced_with_placeholder_char) {
+    // 0xff is not a valid encoding and is implicitly converted to U+FFFD,
+    // which is the standard Unicode replacement character.
+    EXPECT_EQ(calculate("\xff", "a", 2),         std::optional{1});
+    EXPECT_EQ(calculate("\xff\xff", "a", 2),     std::optional{2});
+    EXPECT_EQ(calculate("a", "\xff", 2),         std::optional{1});
+    EXPECT_EQ(calculate("a", "\xff\xff\xff", 2), std::nullopt);
+    EXPECT_EQ(calculate("\xff", "\xef\xbf\xbd"/*U+FFFD*/, 2),  std::optional{0});
+}
+
+TEST_P(LevenshteinDfaTest, unsupported_max_edits_value_throws) {
+    EXPECT_THROW((void)LevenshteinDfa::build("abc", 0, dfa_type()), std::invalid_argument);
+    EXPECT_THROW((void)LevenshteinDfa::build("abc", 3, dfa_type()), std::invalid_argument);
+}
+
+// Turn integer v into its bitwise string representation with the MSB as the leftmost character.
+template <std::unsigned_integral T>
+std::string bits_to_str(T v) {
+    constexpr const uint8_t n_bits = sizeof(T) * 8;
+    std::string ret(n_bits, '0');
+    for (uint8_t bit = 0; bit < n_bits; ++bit) {
+        if (v & (1 << bit)) {
+            ret[n_bits - bit - 1] = '1';
+        }
+    }
+    return ret;
+}
+
+using DfaTypeAndMaxEdits = std::tuple<LevenshteinDfa::DfaType, uint32_t>;
+
+struct LevenshteinDfaSuccessorTest : TestWithParam<DfaTypeAndMaxEdits> {
+    // Print test suffix as e.g. "/Explicit_1" instead of just a GTest-chosen number.
+    static std::string stringify_params(const TestParamInfo<ParamType>& info) {
+        std::ostringstream ss;
+        ss << std::get<0>(info.param) << '_' << std::get<1>(info.param);
+        return ss.str();
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(SupportedMaxEdits,
+                         LevenshteinDfaSuccessorTest,
+                         Combine(Values(LevenshteinDfa::DfaType::Explicit,
+                                        LevenshteinDfa::DfaType::Implicit),
+                                 Values(1, 2)),
+                         LevenshteinDfaSuccessorTest::stringify_params);
+
+/**
+ * Exhaustively test successor generation by matching all target and source strings
+ * in {0,1}^8 against each other. Since we generate bit strings identical to the
+ * bit patterns of the underlying counter(s), any string at index `i+1` will compare
+ * lexicographically greater than the one at `i`. We use this to test that we never
+ * miss a valid match that comes between a mismatch and its generated successor.
+ *
+ * For each mismatch we note the successor it emitted. Verify that each subsequent
+ * match() invocation for a source string < the successor results in a mismatch.
+ *
+ * We test this for both max edit distance 1 and 2. Despite being an exhaustive test,
+ * this completes in a few dozen milliseconds even with ASan instrumentation.
+ *
+ * Inspired by approach used by Lucene DFA exhaustive testing.
+ */
+TEST_P(LevenshteinDfaSuccessorTest, exhaustive_successor_test) {
+    const auto [dfa_type, max_edits] = GetParam();
+    for (uint32_t i = 0; i < 256; ++i) {
+        const auto target = bits_to_str(static_cast<uint8_t>(i));
+        auto target_dfa = LevenshteinDfa::build(target, max_edits, dfa_type);
+        std::string skip_to, successor;
+        for (uint32_t j = 0; j < 256; ++j) {
+            const auto source = bits_to_str(static_cast<uint8_t>(j));
+            auto maybe_match = target_dfa.match(source, &successor);
+            if (maybe_match.matches() && !skip_to.empty()) {
+                ASSERT_GE(source, skip_to);
+            } else if (!maybe_match.matches()) {
+                ASSERT_FALSE(successor.empty()) << source;
+                ASSERT_GE(successor, skip_to)   << source;
+                ASSERT_GT(successor, source)    << source;
+                skip_to = successor;
+            }
+        }
+    }
+}
+
+namespace {
+
+template <uint8_t MaxEdits>
+void explore(const DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>& stepper,
+             const typename DfaSteppingBase<FixedMaxEditDistanceTraits<MaxEdits>>::StateType& in_state)
+{
+    ASSERT_EQ(stepper.can_match(stepper.step(in_state, WILDCARD)),
+              stepper.can_wildcard_step(in_state));
+    if (!stepper.can_match(in_state)) {
+        return; // reached the end of the line
+    }
+    // DFS-explore all matching transitions, as well as one non-matching transition
+    auto t = stepper.transitions(in_state);
+    for (uint32_t c: t.u32_chars()) {
+        ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, c)));
+    }
+    ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.step(in_state, WILDCARD)));
+}
+
+} // anon ns
+
+using StateStepperTypes = Types<
+    DfaSteppingBase<FixedMaxEditDistanceTraits<1>>,
+    DfaSteppingBase<FixedMaxEditDistanceTraits<2>>
+>;
+
+template <typename SteppingBase>
+struct LevenshteinSparseStateTest : Test {};
+
+TYPED_TEST_SUITE(LevenshteinSparseStateTest, StateStepperTypes);
+
+// "Meta-test" for checking that the `can_wildcard_step` predicate function is
+// functionally equivalent to evaluating `can_match(stepper.step(in_state, WILDCARD))`
+TYPED_TEST(LevenshteinSparseStateTest, wildcard_step_predcate_is_equivalent_to_step_with_can_match) {
+    for (const char* target : {"", "a", "ab", "abc", "abcdef", "aaaaa"}) {
+        auto u32_target = utf8_string_to_utf32(target);
+        TypeParam stepper(u32_target);
+        ASSERT_NO_FATAL_FAILURE(explore(stepper, stepper.start()));
+    }
+}
+
+template <typename T>
+void do_not_optimize_away(T&& t) noexcept {
+    asm volatile("" : : "m"(t) : "memory"); // Clobber the value to avoid losing it to compiler optimizations
+}
+
+enum class BenchmarkType {
+    DfaExplicit,
+    DfaImplicit,
+    Legacy
+};
+
+const char* to_s(BenchmarkType t) noexcept {
+    // Note: need underscores since this is used as part of GTest-generated test instance names
+    switch (t) {
+    case BenchmarkType::DfaExplicit: return "DFA_explicit";
+    case BenchmarkType::DfaImplicit: return "DFA_implicit";
+    case BenchmarkType::Legacy:      return "legacy";
+    }
+    abort();
+}
+
+[[nodiscard]] bool benchmarking_enabled() noexcept {
+    return !benchmark_dictionary.empty();
+}
+
+[[nodiscard]] std::vector<uint32_t> string_lengths() {
+    return {2, 8, 16, 64, 256, 1024, 1024*16, 1024*64};
+}
+
+struct LevenshteinBenchmarkTest : TestWithParam<BenchmarkType> {
+
+    static std::string stringify_params(const TestParamInfo<ParamType>& info) {
+        return to_s(info.param);
+    }
+
+    void SetUp() override {
+        if (!benchmarking_enabled()) {
+            GTEST_SKIP() << "benchmarking not enabled";
+        }
+    }
+
+    static BenchmarkType benchmark_type() noexcept { return GetParam(); }
+
+    static const std::vector<std::string>& load_dictionary_once() {
+        static auto sorted_lines = read_and_sort_all_lines(fs::path(benchmark_dictionary));
+        return sorted_lines;
+    }
+
+    static std::vector<std::string> read_and_sort_all_lines(const fs::path& file_path) {
+        std::ifstream ifs(file_path);
+        if (!ifs.is_open()) {
+            throw std::invalid_argument("File does not exist");
+        }
+        std::vector<std::string> lines;
+        std::string line;
+        while (std::getline(ifs, line)) {
+            lines.emplace_back(line);
+        }
+        std::sort(lines.begin(), lines.end());
+        return lines;
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(AllDfaTypes,
+                         LevenshteinBenchmarkTest,
+                         Values(BenchmarkType::DfaExplicit,
+                                BenchmarkType::DfaImplicit,
+                                BenchmarkType::Legacy),
+                         LevenshteinBenchmarkTest::stringify_params);
+
+// ("abc", 1) => "a"
+// ("abc", 3) => "abc"
+// ("abc", 7) => "abcabca"
+//  ... and so on.
+std::string repeated_string(std::string_view str, uint32_t sz) {
+    uint32_t chunks = sz / str.size();
+    std::string ret;
+    ret.reserve(sz);
+    for (uint32_t i = 0; i < chunks; ++i) {
+        ret += str;
+    }
+    uint32_t rem = sz % str.size();
+    ret += str.substr(0, rem);
+    return ret;
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_worst_case_matching_excluding_setup_time) {
+    using vespalib::BenchmarkTimer;
+    const auto type = benchmark_type();
+    fprintf(stderr, "------ %s ------\n", to_s(type));
+    for (uint8_t k : {1, 2}) {
+        for (uint32_t sz : string_lengths()) {
+            // Use same string as both source and target. This is the worst case in that the entire
+            // string must be matched and any sparse representation is always maximally filled since
+            // we never expend any edits via mismatches.
+            // Also ensure that we have multiple out-edges per node (i.e. don't just repeat "AAA" etc.).
+            std::string str = repeated_string("abcde", sz);
+            double min_time_s;
+            if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) {
+                auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+                                                                     : LevenshteinDfa::DfaType::Implicit;
+                auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+                min_time_s = BenchmarkTimer::benchmark([&] {
+                    auto res = dfa.match(str, nullptr); // not benchmarking successor generation
+                    do_not_optimize_away(res);
+                }, 1.0);
+            } else {
+                min_time_s = BenchmarkTimer::benchmark([&] {
+                    auto str_u32 = utf8_string_to_utf32(str); // Must be done per term, so included in benchmark body
+                    auto res = vespalib::LevenshteinDistance::calculate(str_u32, str_u32, k);
+                    do_not_optimize_away(res);
+                }, 1.0);
+            }
+            fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+        }
+    }
+}
+
+TEST(LevenshteinExplicitDfaBenchmarkTest, benchmark_explicit_dfa_construction) {
+    if (!benchmarking_enabled()) {
+        GTEST_SKIP() << "benchmarking not enabled";
+    }
+    using vespalib::BenchmarkTimer;
+    for (uint8_t k : {1, 2}) {
+        for (uint32_t sz : string_lengths()) {
+            std::string str = repeated_string("abcde", sz);
+            double min_time_s = BenchmarkTimer::benchmark([&] {
+                auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit);
+                do_not_optimize_away(dfa);
+            }, 2.0);
+            auto dfa = LevenshteinDfa::build(str, k, LevenshteinDfa::DfaType::Explicit);
+            size_t mem_usage = dfa.memory_usage();
+            fprintf(stderr, "k=%u, sz=%u: \t%g us \t%zu bytes\n", k, sz, min_time_s * 1000000.0, mem_usage);
+        }
+    }
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_brute_force_dictionary_scan) {
+    using vespalib::BenchmarkTimer;
+    const auto type = benchmark_type();
+    const auto dict = load_dictionary_once();
+    std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64};
+    fprintf(stderr, "------ %s ------\n", to_s(type));
+    for (uint8_t k : {1, 2}) {
+        for (uint32_t sz : target_lengths) {
+            std::string str = repeated_string("abcde", sz);
+            double min_time_s;
+            if (type == BenchmarkType::DfaExplicit || type == BenchmarkType::DfaImplicit) {
+                auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+                                                                     : LevenshteinDfa::DfaType::Implicit;
+                auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+                min_time_s = BenchmarkTimer::benchmark([&] {
+                    for (const auto& line : dict) {
+                        auto res = dfa.match(line, nullptr);
+                        do_not_optimize_away(res);
+                    }
+                }, 2.0);
+            } else {
+                min_time_s = BenchmarkTimer::benchmark([&] {
+                    auto target_u32 = utf8_string_to_utf32(str);
+                    for (const auto& line : dict) {
+                        auto line_u32 = utf8_string_to_utf32(line);
+                        auto res = vespalib::LevenshteinDistance::calculate(line_u32, target_u32, k);
+                        do_not_optimize_away(res);
+                    }
+                }, 2.0);
+            }
+            fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+        }
+    }
+}
+
+TEST_P(LevenshteinBenchmarkTest, benchmark_skipping_dictionary_scan) {
+    const auto type = benchmark_type();
+    if (type == BenchmarkType::Legacy) {
+        GTEST_SKIP() << "Skipping not supported for legacy implementation";
+    }
+    using vespalib::BenchmarkTimer;
+    const auto dict = load_dictionary_once();
+    std::vector target_lengths = {1, 2, 4, 8, 12, 16, 24, 32, 64};
+    fprintf(stderr, "------ %s ------\n", to_s(type));
+    for (uint8_t k : {1, 2}) {
+        for (uint32_t sz : target_lengths) {
+            std::string str = repeated_string("abcde", sz);
+            auto dfa_type = (type == BenchmarkType::DfaExplicit) ? LevenshteinDfa::DfaType::Explicit
+                                                                 : LevenshteinDfa::DfaType::Implicit;
+            auto dfa = LevenshteinDfa::build(str, k, dfa_type);
+            double min_time_s = BenchmarkTimer::benchmark([&] {
+                auto iter = dict.cbegin();
+                auto end = dict.cend();
+                std::string successor;
+                while (iter != end) {
+                    auto maybe_match = dfa.match(*iter, &successor);
+                    if (maybe_match.matches()) {
+                        ++iter;
+                    } else {
+                        iter = std::lower_bound(iter, end, successor);
+                    }
+                }
+            }, 2.0);
+            fprintf(stderr, "k=%u, sz=%u: \t%g us\n", k, sz, min_time_s * 1000000.0);
+        }
+    }
+}
+
+// TODO:
+//  - explicit successor generation benchmark
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    if (argc > 1) {
+        benchmark_dictionary = argv[1];
+        if (!fs::exists(fs::path(benchmark_dictionary))) {
+            fprintf(stderr, "Benchmark dictionary file '%s' does not exist\n", benchmark_dictionary.c_str());
+            return 1;
+        }
+    }
+    return RUN_ALL_TESTS();
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
index 1d770163e06..bdbb03bcfee 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
@@ -1,8 +1,12 @@
 # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 vespa_add_library(vespalib_vespalib_fuzzy OBJECT
-        SOURCES
+    SOURCES
+        explicit_levenshtein_dfa.cpp
         fuzzy_matcher.cpp
+        implicit_levenshtein_dfa.cpp
+        levenshtein_dfa.cpp
         levenshtein_distance.cpp
-        DEPENDS
-        )
+        unicode_utils.cpp
+    DEPENDS
+)
 
diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h
new file mode 100644
index 00000000000..c445c60cc01
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_matcher.h
@@ -0,0 +1,70 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <concepts>
+#include <cstdint>
+
+namespace vespalib::fuzzy {
+
+// Concept that all DFA matcher implementations must satisfy
+template <typename T>
+concept DfaMatcher = requires(T a) {
+    typename T::StateType;
+    typename T::StateParamType;
+    typename T::EdgeType;
+
+    // Initial (starting) state of the DFA
+    { a.start() } -> std::same_as<typename T::StateType>;
+
+    // Whether a given state constitutes a string match within the maximum number of edits
+    { a.is_match(typename T::StateType{}) } -> std::same_as<bool>;
+
+    // Whether a given state _may_ result in a match, either in the given state or in the
+    // future if the remaining string input is within the max edit distance
+    { a.can_match(typename T::StateType{}) } -> std::same_as<bool>;
+
+    // Whether the given state is a valid state. Used for invariant checking.
+    { a.valid_state(typename T::StateType{}) } -> std::same_as<bool>;
+
+    // Iff the given state represents a terminal matching state, returns the number of
+    // edits required to reach the state. Otherwise, returns max edits + 1.
+    { a.match_edit_distance(typename T::StateType{}) } -> std::same_as<uint8_t>;
+
+    // Returns the state that is the result of matching the single logical Levenshtein
+    // matrix row represented by the given state with the input u32 character value.
+    { a.match_input(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::StateType>;
+
+    // Returns the state that is the result of matching the single logical Levenshtein
+    // matrix row represented by the given state with a sentinel character that cannot
+    // match any character in the target string (i.e. is always a mismatch).
+    { a.match_wildcard(typename T::StateType{}) } -> std::same_as<typename T::StateType>;
+
+    // Whether there is exists an out edge from the given state that can accept a
+    // _higher_ UTF-32 code point value (character) than the input u32 value. Such an
+    // edge _may_ be a wildcard edge, which accepts any character.
+    { a.has_higher_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>;
+
+    // Whether there exists an out edge from the given state whose u32 character value
+    // _exactly_ matches the input u32 value.
+    { a.has_exact_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<bool>;
+
+    // Returns the out edge `e` from the given state that satisfies _both_:
+    //   1. higher than the given u32 value
+    //   2. no other out edges are lower than `e`
+    // Only called in a context where the caller already knows that such an edge must exist.
+    { a.lowest_higher_explicit_out_edge(typename T::StateType{}, uint32_t{}) } -> std::same_as<typename T::EdgeType>;
+
+    // Returns the out edge from the given state that has the lowest character value
+    { a.smallest_explicit_out_edge(typename T::StateType{}) } -> std::same_as<typename T::EdgeType>;
+
+    // Whether the given edge is a valid edge. Used for invariant checking.
+    { a.valid_edge(typename T::EdgeType{}) } -> std::same_as<bool>;
+
+    // For a given edge, returns the UTF-32 code point value the edge represents
+    { a.edge_to_u32char(typename T::EdgeType{}) } -> std::same_as<uint32_t>;
+
+    // Returns the state that is the result of following the given edge from the given state.
+    { a.edge_to_state(typename T::StateType{}, typename T::EdgeType{}) } -> std::same_as<typename T::StateType>;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h
new file mode 100644
index 00000000000..7e7881c5a14
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/dfa_stepping_base.h
@@ -0,0 +1,299 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "sparse_state.h"
+#include <span>
+
+namespace vespalib::fuzzy {
+
+template <typename Traits>
+struct DfaSteppingBase {
+    using StateType       = Traits::StateType;
+    using TransitionsType = Traits::TransitionsType;
+
+    std::span<const uint32_t> _u32_str; // TODO std::u32string_view
+
+    DfaSteppingBase(std::span<const uint32_t> str) noexcept
+        : _u32_str(str)
+    {
+    }
+
+    [[nodiscard]] static constexpr uint8_t max_edits() noexcept {
+        return Traits::max_edits();
+    }
+
+    /**
+     * Returns the initial state of the DFA. This represents the first row in the
+     * Levenshtein matrix.
+     */
+    [[nodiscard]] StateType start() const {
+        StateType ret;
+        const auto j = std::min(static_cast<uint32_t>(max_edits()),
+                                static_cast<uint32_t>(_u32_str.size())); // e.g. the empty string as target
+        for (uint32_t i = 0; i <= j; ++i) {
+            ret.append(i, i);
+        }
+        return ret;
+    }
+
+    /**
+     * DFA stepping function that takes an input (sparse) state and a 32-bit character value
+     * (does not have to be valid UTF-32, but usually is) and generates a resulting state
+     * that represents applying the Levenshtein algorithm on a particular matrix row using
+     * the provided source string character.
+     *
+     * The returned state only includes elements where the edit distance (cost) is within
+     * the maximum number of edits. All other elements are implicitly beyond the max
+     * edit distance. It doesn't matter _how_ far beyond they are, since we have a fixed
+     * maximum to consider.
+     *
+     * Stepping a non-matching state S (can_match(S) == false) results in another non-
+     * matching state.
+     *
+     * As an example, this is a visualization of stepping through all source characters of
+     * the string "fxod" when matching the target string "food" with max edits k=1.
+     * Note: the actual internal representation is logical <column#, cost> tuples, but
+     * rendering as a matrix makes things easier to understand. Elements _not_ part of the
+     * state are rendered as '-'.
+     *
+     *             f o o d
+     * start(): [0 1 - - -]
+     * 'f':     [1 0 1 - -]
+     * 'x':     [- 1 1 - -]
+     * 'o':     [- - 1 1 -]
+     * 'd':     [- - - - 1]
+     *
+     * In this case, the resulting edit distance is 1, with one substitution 'x' -> 'o'.
+     *
+     * If we pull out our trusty pen & paper and do the full matrix calculations, we see
+     * that the above is equivalent to the full matrix with all costs > k pruned away:
+     *
+     *             f o o d
+     *          [0 1 2 3 4]
+     *        f [1 0 1 2 3]
+     *        x [2 1 1 2 3]
+     *        o [3 2 1 1 2]
+     *        d [4 3 2 2 1]
+     *
+     * Since we're working on sparse states, stepping requires a bit of manual edge case
+     * handling when compared to a dense representation.
+     *
+     * We first have to handle the case where our state includes the 0th matrix column.
+     * In an explicit Levenshtein matrix of target string length n, source string length m,
+     * the first column is always the values [0, m], increasing with 1 per row (the first
+     * _row_ is handled by start()).
+     *
+     * To mirror this, if our sparse state includes column 0 we have to increment it by 1,
+     * unless doing so would bring the cost beyond our max number of edits, in which case
+     * we don't bother including the column in the new state at all. These correspond to
+     * the start() -> 'f' -> 'x' transitions in the example above.
+     *
+     * What remains is then to do the actual Levenshtein insert/delete/substitute formula
+     * for matching positions in the matrix. Let d represent the logical (full) Levenshtein
+     * distance matrix and cell d[i, j] be the minimum number of edits between source string
+     * character at i+1 and target string character at j+1:
+     *
+     * Insertion cost:    d[i, j-1]   + 1
+     * Deletion cost:     d[i-1, j]   + 1
+     * Substitution cost: d[i-1, j-1] + (s[i-1] == t[j-1] ? 1 :0)
+     *
+     * d[i, j] = min(Insertion cost, Deletion cost, Substitution cost)
+     *
+     * We have to turn this slightly on the head, as instead of going through a matrix row
+     * and "pulling" values from the previous row, we have to go through a state representing
+     * the previous row and "push" new values instead (iff these values are within max edits).
+     * This also means we compute costs for indexes offset by 1 from the source state index
+     * (can be visualized as the element one down diagonally to the right).
+     *
+     * Insertion considers the current row only, i.e. the state being generated. We always
+     * work left to right in column order, so we can check if the last element (if any)
+     * in our _new_ sparse state is equal to the index of our source state element. If not,
+     * we know that it was beyond max edits. Max edits + 1 is inherently beyond max edits
+     * and need not be included.
+     *
+     * Deletion considers the cell directly above our own, which is part of the input state
+     * if it exists. Since we're computing the costs of cells at index + 1, we know that the
+     * only way for this cell to be present in the state is if the _next_ element of our
+     * input state exists and has an index equal to index + 1. If so, the deletion cost is
+     * the cost recorded for this element + 1.
+     *
+     * Substitution considers the cell diagonally up to the left. This very conveniently
+     * happens to be the input state cell we're currently working from, so it's therefore
+     * always present.
+     *
+     * Example stepping with c='x', max edits k=1:
+     *
+     * ====== Initially ======
+     *
+     *              f o o d
+     * state_in: [1 0 1 - -]  (0:1, 1:0, 2:1)
+     * out:      []           ()
+     *
+     * We have a 0th column in state_in, but incrementing it results in 2 > k, so not
+     * appended to out.
+     *
+     * ====== State (0:1), computing for index 1 ======
+     *
+     * Insertion:    out state is empty (no cell to our left), so implicit insertion cost
+     *               is > k
+     * Deletion:     state_in[1] is (1:0), which means it represents the cell just above
+     *               index 1. Deletion cost is therefore 0+1 = 1
+     * Substitution: (t[0] = 'f') != (c = 'x'), so substitution cost is 1+1 = 2
+     *
+     * Min cost is 1, which is <= k. Appending to output.
+     *
+     * out: [- 1] (1:1)
+     *
+     * ====== State (1:0), computing for index 2 ======
+     *
+     * Insertion:    last element in out has index 1 (cell to our immediate left) with cost
+     *               1, so insertion cost is 1+1 = 2
+     * Deletion:     state_in[2] is (2:1), which means it represents the cell just above
+     *               index 2. Deletion cost is therefore 1+1 = 2
+     * Substitution: (t[1] = 'o') != (c = 'x'), so substitution cost is 0+1 = 1
+     *
+     * Min cost is 1, which is <= k. Appending to output.
+     *
+     * out: [- 1 1] (1:1, 2:1)
+     *
+     * ====== State (2:1), computing for index 3 ======
+     *
+     * Insertion:    last element in out has index 2 (cell to our immediate left) with cost
+     *               1, so insertion cost is 1+1 = 2
+     * Deletion:     state_in[3] does not exist, so implicit deletion cost is > k
+     * Substitution: (t[2] = 'o') != (c = 'x'), so substitution cost is 1+1 = 2
+     *
+     * Min cost is 2, which is > k. Not appending to output.
+     *
+     * Resulting output state (right-padded for clarity):
+     *
+     *   [- 1 1 - -] (1:1, 2:1)
+     *
+     */
+    [[nodiscard]] StateType step(const StateType& state_in, uint32_t c) const {
+        if (state_in.empty()) {
+            return state_in; // A non-matching state can only step to another equally non-matching state
+        }
+        StateType new_state;
+        if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) {
+            new_state.append(0, state_in.cost(0) + 1);
+        }
+        for (uint32_t i = 0; i < state_in.size(); ++i) {
+            const auto idx = state_in.index(i);
+            if (idx == _u32_str.size()) [[unlikely]] {
+                break; // Can't process beyond matrix width
+            }
+            const uint8_t sub_cost = (_u32_str[idx] == c) ? 0 : 1;
+            // For our Levenshtein insert/delete/sub ops, we know that if a particular index is _not_
+            // in the sparse state, its implicit distance is beyond the max edits, and need not be
+            // considered.
+            auto dist = state_in.cost(i) + sub_cost; // (Substitution)
+            if (!new_state.empty() && (new_state.last_index() == idx)) { // (Insertion) anything to our immediate left?
+                dist = std::min(dist, new_state.last_cost() + 1);
+            }
+            if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) { // (Deletion) anything immediately above?
+                dist = std::min(dist, state_in.cost(i + 1) + 1);
+            }
+            if (dist <= max_edits()) {
+                new_state.append(idx + 1, dist);
+            }
+        }
+        return new_state;
+    }
+
+    /**
+     * Simplified version of step() which does not assemble a new state, but only checks
+     * whether _any_ mismatching character can be substituted in and still result in a
+     * potentially matching state. This is the case if the resulting state would contain
+     * _at least one_ entry (recalling that we only retain entries that are within the
+     * max number of edits).
+     *
+     * Consider using this directly instead of `can_match(step(state, WILDCARD))`,
+     * which has the exact same semantics, but requires computing the full (sparse)
+     * state before checking if it has any element at all. can_wildcard_step() just
+     * jumps straight to the last part.
+     */
+    [[nodiscard]] bool can_wildcard_step(const StateType& state_in) const noexcept {
+        if (state_in.empty()) {
+            return false; // by definition
+        }
+        if ((state_in.index(0) == 0) && (state_in.cost(0) < max_edits())) {
+            return true;
+        }
+        for (uint32_t i = 0; i < state_in.size(); ++i) {
+            const auto idx = state_in.index(i);
+            if (idx == _u32_str.size()) [[unlikely]] {
+                break;
+            }
+            const uint8_t sub_cost = 1; // by definition
+            auto dist = state_in.cost(i) + sub_cost;
+            // Insertion only looks at the entries already computed in the current row
+            // and always increases the cost by 1. Since we always bail out immediately if
+            // there would have been at least one entry within max edits, we transitively
+            // know that since we have not bailed out yet there is no way we can get here
+            // and have insertion actually yield a match. So skip computing it entirely.
+            if ((i < state_in.size() - 1) && (state_in.index(i + 1) == idx + 1)) {
+                dist = std::min(dist, state_in.cost(i + 1) + 1);
+            }
+            if (dist <= max_edits()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Checks if the given state represents a terminal state within the max number of edits
+     */
+    [[nodiscard]] bool is_match(const StateType& state) const noexcept {
+        // If the last index is equal to the string's length, it means we were able to match
+        // the entire string and still be within the max edit distance.
+        return (!state.empty() && state.last_index() == static_cast<uint32_t>(_u32_str.size()));
+    }
+
+    /**
+     * Iff the input state represents a terminal matching state, returns the number of
+     * edits required to reach the state. Otherwise, returns max edits + 1.
+     */
+    [[nodiscard]] uint8_t match_edit_distance(const StateType& state) const noexcept {
+        if (!is_match(state)) {
+            return max_edits() + 1;
+        }
+        return state.last_cost();
+    }
+
+    /**
+     * Returns whether the given state _may_ end up matching the target string,
+     * depending on the remaining source string characters.
+     *
+     * Note: is_match(s)  => can_match(s) is true, but
+     *       can_match(s) => is_match(s)  is false
+     */
+    [[nodiscard]] bool can_match(const StateType& state) const noexcept {
+        // The presence of any entries at all indicates that we may still potentially match
+        // the target string if the remaining input is within the maximum number of edits.
+        return !state.empty();
+    }
+
+    /**
+     * All valid character transitions from this state are those that are reachable
+     * within the max edit distance.
+     */
+    TransitionsType transitions(const StateType& state) const {
+        TransitionsType t;
+        for (size_t i = 0; i < state.size(); ++i) {
+            const auto idx = state.index(i);
+            if (idx < _u32_str.size()) [[likely]] {
+                t.add_char(_u32_str[idx]);
+            }
+        }
+        // We must ensure transitions are in increasing character order, so that the
+        // lowest possible higher char than any candidate char can be found with a
+        // simple "first-fit" linear scan.
+        t.sort();
+        return t;
+    }
+
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg
new file mode 100644
index 00000000000..0974e1d161f
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/examples/food_dfa.svg
@@ -0,0 +1,286 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ --><!-- Title: levenshtein_dfa Pages: 1 --><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="333pt" height="488pt" viewBox="0.00 0.00 333.00 488.00">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 484)">
+<title>levenshtein_dfa</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-484 329,-484 329,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node">
+<title>0</title>
+<ellipse fill="none" stroke="#000000" cx="211" cy="-462" rx="18" ry="18"/>
+<text text-anchor="middle" x="211" y="-457.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- 1 -->
+<g id="node2" class="node">
+<title>1</title>
+<ellipse fill="none" stroke="#000000" cx="157" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="157" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">1</text>
+</g>
+<!-- 0&#45;&gt;1 -->
+<g id="edge1" class="edge">
+<title>0-&gt;1</title>
+<path fill="none" stroke="#000000" d="M201.5939,-446.5322C193.3592,-432.9906 181.2571,-413.0894 171.7374,-397.4348"/>
+<polygon fill="#000000" stroke="#000000" points="174.6658,-395.5142 166.4795,-388.7885 168.6849,-399.1513 174.6658,-395.5142"/>
+<text text-anchor="middle" x="189.9453" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 2 -->
+<g id="node3" class="node">
+<title>2</title>
+<ellipse fill="none" stroke="#000000" cx="211" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="211" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">2</text>
+</g>
+<!-- 0&#45;&gt;2 -->
+<g id="edge2" class="edge">
+<title>0-&gt;2</title>
+<path fill="none" stroke="#000000" d="M211,-443.6006C211,-431.4949 211,-415.4076 211,-401.6674"/>
+<polygon fill="#000000" stroke="#000000" points="214.5001,-401.272 211,-391.272 207.5001,-401.2721 214.5001,-401.272"/>
+<text text-anchor="middle" x="214.8913" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 3 -->
+<g id="node4" class="node">
+<title>3</title>
+<ellipse fill="none" stroke="#000000" cx="265" cy="-373.2" rx="18" ry="18"/>
+<text text-anchor="middle" x="265" y="-369" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">3</text>
+</g>
+<!-- 0&#45;&gt;3 -->
+<g id="edge3" class="edge">
+<title>0-&gt;3</title>
+<path fill="none" stroke="#000000" d="M220.4061,-446.5322C228.6408,-432.9906 240.7429,-413.0894 250.2626,-397.4348"/>
+<polygon fill="#000000" stroke="#000000" points="253.3151,-399.1513 255.5205,-388.7885 247.3342,-395.5142 253.3151,-399.1513"/>
+<text text-anchor="middle" x="244.7223" y="-413.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 4 -->
+<g id="node5" class="node">
+<title>4</title>
+<ellipse fill="none" stroke="#000000" cx="157" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="157" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">4</text>
+</g>
+<!-- 1&#45;&gt;4 -->
+<g id="edge4" class="edge">
+<title>1-&gt;4</title>
+<path fill="none" stroke="#000000" d="M157,-354.8006C157,-342.6949 157,-326.6076 157,-312.8674"/>
+<polygon fill="#000000" stroke="#000000" points="160.5001,-312.472 157,-302.472 153.5001,-312.4721 160.5001,-312.472"/>
+<text text-anchor="middle" x="158.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 1&#45;&gt;4 -->
+<g id="edge6" class="edge">
+<title>1-&gt;4</title>
+<path fill="none" stroke="#000000" d="M156.1231,-354.8902C155.8898,-349.221 155.6708,-342.9535 155.5554,-337.2 155.4057,-329.7348 155.4057,-327.8652 155.5554,-320.4 155.6041,-317.9728 155.6712,-315.454 155.7501,-312.9273"/>
+<polygon fill="#000000" stroke="#000000" points="159.2558,-312.8309 156.1231,-302.7098 152.2605,-312.5755 159.2558,-312.8309"/>
+<text text-anchor="middle" x="158.7223" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 5 -->
+<g id="node6" class="node">
+<title>5</title>
+<ellipse fill="none" stroke="#000000" cx="103" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="103" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">5</text>
+</g>
+<!-- 1&#45;&gt;5 -->
+<g id="edge5" class="edge">
+<title>1-&gt;5</title>
+<path fill="none" stroke="#000000" d="M147.5939,-357.7322C139.3592,-344.1906 127.2571,-324.2894 117.7374,-308.6348"/>
+<polygon fill="#000000" stroke="#000000" points="120.6658,-306.7142 112.4795,-299.9885 114.6849,-310.3513 120.6658,-306.7142"/>
+<text text-anchor="middle" x="138.8913" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 6 -->
+<g id="node7" class="node">
+<title>6</title>
+<ellipse fill="none" stroke="#000000" cx="307" cy="-284.4" rx="18" ry="18"/>
+<text text-anchor="middle" x="307" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">6</text>
+</g>
+<!-- 2&#45;&gt;6 -->
+<g id="edge7" class="edge">
+<title>2-&gt;6</title>
+<path fill="none" stroke="#000000" d="M224.3484,-360.8527C240.3968,-346.0079 267.511,-320.9274 286.2858,-303.5607"/>
+<polygon fill="#000000" stroke="#000000" points="288.8006,-306.0022 293.765,-296.6424 284.0473,-300.8635 288.8006,-306.0022"/>
+<text text-anchor="middle" x="268.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 7 -->
+<g id="node8" class="node">
+<title>7</title>
+<ellipse fill="none" stroke="#000000" cx="190" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="190" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">7</text>
+</g>
+<!-- 2&#45;&gt;7 -->
+<g id="edge8" class="edge">
+<title>2-&gt;7</title>
+<path fill="none" stroke="#000000" d="M208.8708,-355.1934C205.2075,-324.212 197.6865,-260.606 193.3267,-223.7346"/>
+<polygon fill="#000000" stroke="#000000" points="196.7849,-223.1741 192.1348,-213.6543 189.8333,-223.9961 196.7849,-223.1741"/>
+<text text-anchor="middle" x="205.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 3&#45;&gt;6 -->
+<g id="edge9" class="edge">
+<title>3-&gt;6</title>
+<path fill="none" stroke="#000000" d="M272.7034,-356.9127C278.881,-343.8515 287.6665,-325.2765 294.7989,-310.1966"/>
+<polygon fill="#000000" stroke="#000000" points="298.0914,-311.4211 299.2032,-300.8848 291.7635,-308.4281 298.0914,-311.4211"/>
+<text text-anchor="middle" x="290.9453" y="-324.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">f</text>
+</g>
+<!-- 8 -->
+<g id="node9" class="node">
+<title>8</title>
+<ellipse fill="none" stroke="#000000" cx="263" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="263" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">8</text>
+</g>
+<!-- 3&#45;&gt;8 -->
+<g id="edge10" class="edge">
+<title>3-&gt;8</title>
+<path fill="none" stroke="#000000" d="M264.7972,-355.1934C264.4483,-324.212 263.732,-260.606 263.3168,-223.7346"/>
+<polygon fill="#000000" stroke="#000000" points="266.8158,-223.6142 263.2033,-213.6543 259.8162,-223.6931 266.8158,-223.6142"/>
+<text text-anchor="middle" x="267.8913" y="-280.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 4&#45;&gt;7 -->
+<g id="edge11" class="edge">
+<title>4-&gt;7</title>
+<path fill="none" stroke="#000000" d="M163.3627,-267.2785C168.1106,-254.5023 174.6869,-236.8062 180.1171,-222.194"/>
+<polygon fill="#000000" stroke="#000000" points="183.4541,-223.2619 183.6568,-212.669 176.8925,-220.8234 183.4541,-223.2619"/>
+<text text-anchor="middle" x="180.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 5&#45;&gt;7 -->
+<g id="edge14" class="edge">
+<title>5-&gt;7</title>
+<path fill="none" stroke="#000000" d="M115.8371,-271.2973C130.1833,-256.6543 153.5826,-232.7709 170.2601,-215.7483"/>
+<polygon fill="#000000" stroke="#000000" points="172.8948,-218.0603 177.393,-208.4678 167.8946,-213.1615 172.8948,-218.0603"/>
+<text text-anchor="middle" x="157.7223" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 9 -->
+<g id="node10" class="node">
+<title>9</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="60" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="60" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">9(1)</text>
+</g>
+<!-- 5&#45;&gt;9 -->
+<g id="edge12" class="edge">
+<title>5-&gt;9</title>
+<path fill="none" stroke="#000000" d="M95.1131,-268.1127C88.7885,-255.0515 79.7938,-236.4765 72.4916,-221.3966"/>
+<polygon fill="#000000" stroke="#000000" points="75.4909,-219.5597 67.9825,-212.0848 69.1907,-222.6105 75.4909,-219.5597"/>
+<text text-anchor="middle" x="89.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 10 -->
+<g id="node11" class="node">
+<title>10</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="126" cy="-195.6" rx="18" ry="18"/>
+<text text-anchor="middle" x="126" y="-191.4" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">10(1)</text>
+</g>
+<!-- 5&#45;&gt;10 -->
+<g id="edge13" class="edge">
+<title>5-&gt;10</title>
+<path fill="none" stroke="#000000" d="M107.5441,-266.856C110.7887,-254.3287 115.2168,-237.2326 118.9222,-222.9264"/>
+<polygon fill="#000000" stroke="#000000" points="122.3462,-223.6657 121.4654,-213.1076 115.5698,-221.9105 122.3462,-223.6657"/>
+<text text-anchor="middle" x="120.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 6&#45;&gt;8 -->
+<g id="edge15" class="edge">
+<title>6-&gt;8</title>
+<path fill="none" stroke="#000000" d="M298.9297,-268.1127C292.4172,-254.9694 283.1382,-236.2426 275.6413,-221.1125"/>
+<polygon fill="#000000" stroke="#000000" points="278.5951,-219.1904 271.0191,-211.784 272.3229,-222.2984 278.5951,-219.1904"/>
+<text text-anchor="middle" x="291.8913" y="-235.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 11 -->
+<g id="node12" class="node">
+<title>11</title>
+<ellipse fill="none" stroke="#000000" cx="170" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="170" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">11</text>
+</g>
+<!-- 7&#45;&gt;11 -->
+<g id="edge16" class="edge">
+<title>7-&gt;11</title>
+<path fill="none" stroke="#000000" d="M185.9527,-177.63C183.1694,-165.2722 179.4189,-148.6197 176.2537,-134.5662"/>
+<polygon fill="#000000" stroke="#000000" points="179.5848,-133.4268 173.973,-124.4402 172.7558,-134.9649 179.5848,-133.4268"/>
+<text text-anchor="middle" x="184.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 12 -->
+<g id="node13" class="node">
+<title>12</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="113" cy="-18" rx="18" ry="18"/>
+<text text-anchor="middle" x="113" y="-13.8" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">12(1)</text>
+</g>
+<!-- 7&#45;&gt;12 -->
+<g id="edge17" class="edge">
+<title>7-&gt;12</title>
+<path fill="none" stroke="#000000" d="M195.7305,-178.1814C201.8942,-156.377 209.2729,-118.3097 197,-88.8 185.9488,-62.2279 158.7426,-42.3831 138.2647,-30.5667"/>
+<polygon fill="#000000" stroke="#000000" points="139.8619,-27.4509 129.4116,-25.7072 136.4936,-33.5872 139.8619,-27.4509"/>
+<text text-anchor="middle" x="206.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 8&#45;&gt;11 -->
+<g id="edge18" class="edge">
+<title>8-&gt;11</title>
+<path fill="none" stroke="#000000" d="M249.6754,-182.8771C234.3441,-168.2382 208.9734,-144.0133 190.9836,-126.8359"/>
+<polygon fill="#000000" stroke="#000000" points="192.9495,-123.8738 183.2999,-119.4993 188.1154,-128.9366 192.9495,-123.8738"/>
+<text text-anchor="middle" x="227.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 13 -->
+<g id="node14" class="node">
+<title>13</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="18" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="18" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">13(1)</text>
+</g>
+<!-- 9&#45;&gt;13 -->
+<g id="edge19" class="edge">
+<title>9-&gt;13</title>
+<path fill="none" stroke="#000000" d="M44.2356,-186.804C34.7839,-180.5699 23.5878,-171.2526 18.2174,-159.6 14.7103,-151.9903 13.6799,-143.0834 13.8048,-134.7757"/>
+<polygon fill="#000000" stroke="#000000" points="17.3023,-134.9283 14.4812,-124.716 10.3181,-134.4586 17.3023,-134.9283"/>
+<text text-anchor="middle" x="22.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 9&#45;&gt;13 -->
+<g id="edge21" class="edge">
+<title>9-&gt;13</title>
+<path fill="none" stroke="#000000" d="M52.2966,-179.3127C46.119,-166.2515 37.3335,-147.6765 30.2011,-132.5966"/>
+<polygon fill="#000000" stroke="#000000" points="33.2365,-130.8281 25.7968,-123.2848 26.9086,-133.8211 33.2365,-130.8281"/>
+<text text-anchor="middle" x="45.7223" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+<!-- 14 -->
+<g id="node15" class="node">
+<title>14</title>
+<ellipse fill="#d3d3d3" stroke="#000000" cx="72" cy="-106.8" rx="18" ry="18"/>
+<text text-anchor="middle" x="72" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">14(0)</text>
+</g>
+<!-- 9&#45;&gt;14 -->
+<g id="edge20" class="edge">
+<title>9-&gt;14</title>
+<path fill="none" stroke="#000000" d="M62.4284,-177.63C64.0874,-165.353 66.3193,-148.8372 68.2105,-134.8421"/>
+<polygon fill="#000000" stroke="#000000" points="71.7044,-135.1219 69.5752,-124.7433 64.7675,-134.1845 71.7044,-135.1219"/>
+<text text-anchor="middle" x="71.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 10&#45;&gt;11 -->
+<g id="edge22" class="edge">
+<title>10-&gt;11</title>
+<path fill="none" stroke="#000000" d="M134.0703,-179.3127C140.5828,-166.1694 149.8618,-147.4426 157.3587,-132.3125"/>
+<polygon fill="#000000" stroke="#000000" points="160.6771,-133.4984 161.9809,-122.984 154.4049,-130.3904 160.6771,-133.4984"/>
+<text text-anchor="middle" x="155.8913" y="-147" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">o</text>
+</g>
+<!-- 10&#45;&gt;12 -->
+<g id="edge23" class="edge">
+<title>10-&gt;12</title>
+<path fill="none" stroke="#000000" d="M124.6819,-177.5934C122.4142,-146.612 117.7583,-83.006 115.0594,-46.1346"/>
+<polygon fill="#000000" stroke="#000000" points="118.5423,-45.772 114.3215,-36.0543 111.561,-46.2831 118.5423,-45.772"/>
+<text text-anchor="middle" x="124.8913" y="-102.6" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 11&#45;&gt;12 -->
+<g id="edge24" class="edge">
+<title>11-&gt;12</title>
+<path fill="none" stroke="#000000" d="M161.5849,-90.7772C155.8346,-80.1448 147.859,-65.9928 140,-54 137.0775,-49.5402 133.7942,-44.8997 130.5502,-40.4917"/>
+<polygon fill="#000000" stroke="#000000" points="133.2767,-38.294 124.4657,-32.4105 127.6846,-42.5045 133.2767,-38.294"/>
+<text text-anchor="middle" x="153.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 13&#45;&gt;12 -->
+<g id="edge25" class="edge">
+<title>13-&gt;12</title>
+<path fill="none" stroke="#000000" d="M29.0516,-92.0851C37.7254,-81.0086 50.4346,-65.7741 63.2174,-54 71.136,-46.7062 80.5495,-39.5675 89.0481,-33.5954"/>
+<polygon fill="#000000" stroke="#000000" points="91.192,-36.3695 97.4722,-27.8367 87.2416,-30.5907 91.192,-36.3695"/>
+<text text-anchor="middle" x="67.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 14&#45;&gt;12 -->
+<g id="edge26" class="edge">
+<title>14-&gt;12</title>
+<path fill="none" stroke="#000000" d="M79.7118,-90.0974C85.7248,-77.0741 94.1818,-58.7574 101.0669,-43.8455"/>
+<polygon fill="#000000" stroke="#000000" points="104.3085,-45.1739 105.3228,-34.6277 97.9532,-42.2395 104.3085,-45.1739"/>
+<text text-anchor="middle" x="100.8913" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">d</text>
+</g>
+<!-- 14&#45;&gt;12 -->
+<g id="edge27" class="edge">
+<title>14-&gt;12</title>
+<path fill="none" stroke="#000000" d="M71.9481,-88.4225C72.5604,-77.8966 74.4493,-64.6966 79.5554,-54 82.4978,-47.8362 86.8709,-42.0067 91.4921,-36.9052"/>
+<polygon fill="#000000" stroke="#000000" points="94.2341,-39.1107 98.7881,-29.5445 89.2625,-34.1828 94.2341,-39.1107"/>
+<text text-anchor="middle" x="82.7223" y="-58.2" font-family="Helvetica,Arial,sans-serif" font-size="14.00" fill="#000000">*</text>
+</g>
+</g>
+</svg>
+\ No newline at end of file
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp
new file mode 100644
index 00000000000..f78de5cc082
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.cpp
@@ -0,0 +1,11 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "explicit_levenshtein_dfa.hpp"
+
+namespace vespalib::fuzzy {
+
+template class ExplicitLevenshteinDfaImpl<1>;
+template class ExplicitLevenshteinDfaImpl<2>;
+template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>;
+template class ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h
new file mode 100644
index 00000000000..49baad21530
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.h
@@ -0,0 +1,147 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_stepping_base.h"
+#include "levenshtein_dfa.h"
+#include "sparse_state.h"
+#include "unicode_utils.h"
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+// A doomed state is one that cannot possibly match the target string
+constexpr const uint32_t DOOMED = UINT32_MAX;
+
+template <uint8_t MaxEdits>
+struct DfaNode {
+    static constexpr uint8_t MaxCharOutEdges = diag(MaxEdits); // Not counting wildcard edge
+
+    struct Edge {
+        uint32_t u32ch;
+        uint32_t node;
+    };
+
+    std::array<Edge, MaxCharOutEdges> match_out_edges_buf;
+    uint32_t wildcard_edge_to = DOOMED;
+    uint8_t num_match_out_edges = 0;
+    uint8_t edits = UINT8_MAX;
+
+    [[nodiscard]] bool has_wildcard_edge() const noexcept {
+        return wildcard_edge_to != DOOMED;
+    }
+
+    [[nodiscard]] uint32_t wildcard_edge_to_or_doomed() const noexcept {
+        return wildcard_edge_to;
+    }
+
+    [[nodiscard]] std::span<const Edge> match_out_edges() const noexcept {
+        return std::span(match_out_edges_buf.begin(), num_match_out_edges);
+    }
+
+    [[nodiscard]] uint32_t match_or_doomed(uint32_t ch) const noexcept {
+        // Always prefer the exact matching edges
+        for (const auto& e : match_out_edges()) {
+            if (e.u32ch == ch) {
+                return e.node;
+            }
+        }
+        // Fallback to wildcard edge if possible (could be doomed)
+        return wildcard_edge_to;
+    }
+
+    [[nodiscard]] bool has_exact_match(uint32_t ch) const noexcept {
+        for (const auto& e : match_out_edges()) {
+            if (e.u32ch == ch) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    [[nodiscard]] size_t has_higher_out_edge(uint32_t ch) const noexcept {
+        if (has_wildcard_edge()) {
+            return true; // implicitly possible to substitute a higher out edge char
+        }
+        return lowest_higher_explicit_out_edge(ch) != nullptr;
+    }
+
+    [[nodiscard]] const Edge* lowest_higher_explicit_out_edge(uint32_t ch) const noexcept {
+        // Important: these _must_ be sorted in increasing code point order
+        for (const auto& e : match_out_edges()) {
+            if (e.u32ch > ch) {
+                return &e;
+            }
+        }
+        return nullptr;
+    }
+
+    void add_match_out_edge(uint32_t out_char, uint32_t out_node) noexcept {
+        assert(num_match_out_edges < MaxCharOutEdges);
+        match_out_edges_buf[num_match_out_edges] = Edge(out_char, out_node);
+        ++num_match_out_edges;
+    }
+
+    void set_wildcard_out_edge(uint32_t out_node) noexcept {
+        assert(wildcard_edge_to == DOOMED);
+        wildcard_edge_to = out_node;
+    }
+};
+
+template <uint8_t MaxEdits>
+class ExplicitLevenshteinDfaImpl final : public LevenshteinDfa::Impl {
+public:
+    static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+    using DfaNodeType = DfaNode<MaxEdits>;
+    using MatchResult = LevenshteinDfa::MatchResult;
+private:
+    std::vector<DfaNodeType> _nodes;
+public:
+    ExplicitLevenshteinDfaImpl() noexcept = default;
+    ~ExplicitLevenshteinDfaImpl() override = default;
+
+    static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+    void ensure_node_array_large_enough_for_index(uint32_t node_index) {
+        if (node_index >= _nodes.size()) {
+            _nodes.resize(node_index + 1);
+        }
+    }
+
+    void set_node_edit_distance(uint32_t node_index, uint8_t edits) {
+        _nodes[node_index].edits = edits;
+    }
+
+    void add_outgoing_edge(uint32_t from_node_idx, uint32_t to_node_idx, uint32_t out_char) {
+        _nodes[from_node_idx].add_match_out_edge(out_char, to_node_idx);
+    }
+
+    void set_wildcard_edge(uint32_t from_node_idx, uint32_t to_node_idx) {
+        _nodes[from_node_idx].set_wildcard_out_edge(to_node_idx);
+    }
+
+    [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override;
+
+    [[nodiscard]] size_t memory_usage() const noexcept override {
+        return sizeof(DfaNodeType) * _nodes.size();
+    }
+
+    void dump_as_graphviz(std::ostream& os) const override;
+};
+
+template <typename Traits>
+class ExplicitLevenshteinDfaBuilder {
+    std::vector<uint32_t> _u32_str_buf; // TODO std::u32string
+public:
+    explicit ExplicitLevenshteinDfaBuilder(std::string_view str)
+        : ExplicitLevenshteinDfaBuilder(utf8_string_to_utf32(str))
+    {}
+
+    explicit ExplicitLevenshteinDfaBuilder(std::vector<uint32_t> str) noexcept
+        : _u32_str_buf(std::move(str))
+    {}
+
+    [[nodiscard]] LevenshteinDfa build_dfa() const;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp
new file mode 100644
index 00000000000..0960219aff3
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/explicit_levenshtein_dfa.hpp
@@ -0,0 +1,228 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "explicit_levenshtein_dfa.h"
+#include "match_algorithm.hpp"
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <iostream>
+#include <span>
+#include <queue>
+
+namespace vespalib::fuzzy {
+
+// DfaMatcher adapter for explicit DFA implementation
+template <uint8_t MaxEdits>
+struct ExplicitDfaMatcher {
+    using DfaNodeType = typename ExplicitLevenshteinDfaImpl<MaxEdits>::DfaNodeType;
+    using StateType   = const DfaNodeType*;
+    using EdgeType    = const DfaNodeType::Edge*;
+
+    using StateParamType = const DfaNodeType*;
+
+    const std::span<const DfaNodeType> _nodes;
+
+    explicit ExplicitDfaMatcher(const std::span<const DfaNodeType> nodes) noexcept
+        : _nodes(nodes)
+    {}
+
+    static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+    StateType start() const noexcept {
+        return &_nodes[0];
+    }
+    bool has_higher_out_edge(StateType node, uint32_t mch) const noexcept {
+        return node->has_higher_out_edge(mch);
+    }
+    StateType match_input(StateType node, uint32_t mch) const noexcept {
+        auto maybe_node_idx = node->match_or_doomed(mch);
+        return ((maybe_node_idx != DOOMED) ? &_nodes[maybe_node_idx] : nullptr);
+    }
+    bool is_match(StateType node) const noexcept {
+        return node->edits <= max_edits();
+    }
+    bool can_match(StateType node) const noexcept {
+        return node != nullptr;
+    }
+    uint8_t match_edit_distance(StateType node) const noexcept {
+        return node->edits;
+    }
+    bool valid_state(StateType node) const noexcept {
+        return node != nullptr;
+    }
+    StateType match_wildcard(StateType node) const noexcept {
+        auto edge_to = node->wildcard_edge_to_or_doomed();
+        return ((edge_to != DOOMED) ? &_nodes[edge_to] : nullptr);
+    }
+    bool has_exact_explicit_out_edge(StateType node, uint32_t ch) const noexcept {
+        return node->has_exact_match(ch);
+    }
+    EdgeType lowest_higher_explicit_out_edge(StateType node, uint32_t ch) const noexcept {
+        return node->lowest_higher_explicit_out_edge(ch);
+    }
+    EdgeType smallest_explicit_out_edge(StateType node) const noexcept {
+        // Out-edges are pre-ordered in increasing code point order, so the first
+        // element is always the smallest possible matching character.
+        assert(!node->match_out_edges().empty());
+        return &node->match_out_edges().front();
+    }
+    bool valid_edge(EdgeType edge) const noexcept {
+        return edge != nullptr;
+    }
+    uint32_t edge_to_u32char(EdgeType edge) const noexcept {
+        return edge->u32ch;
+    }
+    StateType edge_to_state([[maybe_unused]] StateType node, EdgeType edge) const noexcept {
+        return &_nodes[edge->node];
+    }
+};
+
+template <uint8_t MaxEdits>
+LevenshteinDfa::MatchResult
+ExplicitLevenshteinDfaImpl<MaxEdits>::match(std::string_view u8str, std::string* successor_out) const {
+    ExplicitDfaMatcher<MaxEdits> matcher(_nodes);
+    return MatchAlgorithm<MaxEdits>::match(matcher, u8str, successor_out);
+}
+
+template <uint8_t MaxEdits>
+void ExplicitLevenshteinDfaImpl<MaxEdits>::dump_as_graphviz(std::ostream& os) const {
+    os << std::dec << "digraph levenshtein_dfa {\n";
+    os << "    fontname=\"Helvetica,Arial,sans-serif\"\n";
+    os << "    node [shape=circle, fontname=\"Helvetica,Arial,sans-serif\", fixedsize=true];\n";
+    os << "    edge [fontname=\"Helvetica,Arial,sans-serif\"];\n";
+    for (size_t i = 0; i < _nodes.size(); ++i) {
+        const auto& node = _nodes[i];
+        if (node.edits <= max_edits()) {
+            os << "    " << i << " [label=\"" << i << "(" << static_cast<int>(node.edits) << ")\", style=\"filled\"];\n";
+        }
+        for (const auto& edge : node.match_out_edges()) {
+            std::string as_utf8;
+            append_utf32_char_as_utf8(as_utf8, edge.u32ch);
+            os << "    " << i << " -> " << edge.node << " [label=\"" << as_utf8 << "\"];\n";
+        }
+        if (node.wildcard_edge_to != DOOMED) {
+            os << "    " << i << " -> " << node.wildcard_edge_to << " [label=\"*\"];\n";
+        }
+    }
+    os << "}\n";
+}
+
+namespace {
+
+template <typename StateType>
+struct ExploreState {
+    using NodeIdAndExplored    = std::pair<uint32_t, bool>;
+    using SparseExploredStates = vespalib::hash_map<StateType, NodeIdAndExplored, typename StateType::hash>;
+
+    uint32_t             state_counter;
+    SparseExploredStates explored_states;
+
+    ExploreState();
+    ~ExploreState();
+
+    [[nodiscard]] SparseExploredStates::iterator node_of(const StateType& state) {
+        auto maybe_explored = explored_states.find(state);
+        if (maybe_explored != explored_states.end()) {
+            return maybe_explored;
+        }
+        uint32_t this_node = state_counter;
+        assert(state_counter < UINT32_MAX);
+        ++state_counter;
+        return explored_states.insert(std::make_pair(state, std::make_pair(this_node, false))).first; // not yet explored;
+    }
+
+    [[nodiscard]] bool already_explored(const SparseExploredStates::iterator& node) const noexcept {
+        return node->second.second;
+    }
+
+    void tag_as_explored(SparseExploredStates::iterator& node) noexcept {
+        node->second.second = true;
+    }
+};
+
+template <typename StateType>
+ExploreState<StateType>::ExploreState()
+    : state_counter(0),
+      explored_states()
+{}
+
+template <typename StateType>
+ExploreState<StateType>::~ExploreState() = default;
+
+template <typename Traits>
+class ExplicitLevenshteinDfaBuilderImpl : public DfaSteppingBase<Traits> {
+    using Base = DfaSteppingBase<Traits>;
+
+    using StateType       = typename Base::StateType;
+    using TransitionsType = typename Base::TransitionsType;
+
+    using Base::_u32_str;
+    using Base::max_edits;
+    using Base::start;
+    using Base::match_edit_distance;
+    using Base::step;
+    using Base::is_match;
+    using Base::can_match;
+    using Base::transitions;
+public:
+    explicit ExplicitLevenshteinDfaBuilderImpl(std::span<const uint32_t> str) noexcept
+        : DfaSteppingBase<Traits>(str)
+    {
+        assert(str.size() < UINT32_MAX / max_out_edges_per_node());
+    }
+
+    [[nodiscard]] static constexpr uint8_t max_out_edges_per_node() noexcept {
+        // Max possible out transition characters (2k+1) + one wildcard edge.
+        return diag(max_edits()) + 1;
+    }
+
+    [[nodiscard]] LevenshteinDfa build_dfa() const;
+};
+
+template <typename Traits>
+LevenshteinDfa ExplicitLevenshteinDfaBuilderImpl<Traits>::build_dfa() const {
+    auto dfa = std::make_unique<ExplicitLevenshteinDfaImpl<max_edits()>>();
+    ExploreState<StateType> exp;
+    // Use BFS instead of DFS to ensure most node edges point to nodes that are allocated _after_
+    // the parent node, which means the CPU can skip ahead instead of ping-ponging back and forth.
+    // This does _not_ always hold, such as if you have A->B and A->C->B (i.e. both parent and
+    // grandparent have a transition to the same state), in which case B may be allocated before C.
+    std::queue<StateType> to_explore;
+    to_explore.push(start());
+    while (!to_explore.empty()) {
+        auto state = std::move(to_explore.front());
+        to_explore.pop();
+        auto this_node = exp.node_of(state); // note: invalidated by subsequent calls to node_of
+        if (exp.already_explored(this_node)) {
+            continue;
+        }
+        exp.tag_as_explored(this_node);
+        const auto this_node_idx = this_node->second.first;
+        dfa->ensure_node_array_large_enough_for_index(this_node_idx);
+        dfa->set_node_edit_distance(this_node_idx, match_edit_distance(state));
+        auto t = transitions(state);
+        for (uint32_t out_c : t.u32_chars()) {
+            auto new_state = step(state, out_c);
+            auto out_node = exp.node_of(new_state);
+            dfa->add_outgoing_edge(this_node_idx, out_node->second.first, out_c);
+            to_explore.push(std::move(new_state));
+        }
+        auto wildcard_state = step(state, WILDCARD);
+        if (can_match(wildcard_state)) {
+            auto out_node = exp.node_of(wildcard_state);
+            dfa->set_wildcard_edge(this_node_idx, out_node->second.first);
+            to_explore.push(std::move(wildcard_state));
+        } // else: don't bother
+    }
+    return LevenshteinDfa(std::move(dfa));
+}
+
+} // anon ns
+
+template <typename Traits>
+LevenshteinDfa ExplicitLevenshteinDfaBuilder<Traits>::build_dfa() const {
+    ExplicitLevenshteinDfaBuilderImpl<Traits> builder(_u32_str_buf);
+    return builder.build_dfa();
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp
new file mode 100644
index 00000000000..8b9d2eddcac
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.cpp
@@ -0,0 +1,9 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "implicit_levenshtein_dfa.hpp"
+
+namespace vespalib::fuzzy {
+
+template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>;
+template class ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>;
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h
new file mode 100644
index 00000000000..0846b95d135
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.h
@@ -0,0 +1,35 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "levenshtein_dfa.h"
+#include "unicode_utils.h"
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+template <typename Traits>
+class ImplicitLevenshteinDfa final : public LevenshteinDfa::Impl {
+    std::vector<uint32_t> _u32_str_buf; // TODO std::u32string
+public:
+    using MatchResult = LevenshteinDfa::MatchResult;
+
+    explicit ImplicitLevenshteinDfa(std::string_view str)
+        : ImplicitLevenshteinDfa(utf8_string_to_utf32(str))
+    {}
+
+    explicit ImplicitLevenshteinDfa(std::vector<uint32_t> str) noexcept
+        : _u32_str_buf(std::move(str))
+    {}
+
+    ~ImplicitLevenshteinDfa() override = default;
+
+    [[nodiscard]] MatchResult match(std::string_view u8str, std::string* successor_out) const override;
+
+    [[nodiscard]] size_t memory_usage() const noexcept override {
+        return _u32_str_buf.size() * sizeof(uint32_t);
+    }
+
+    void dump_as_graphviz(std::ostream& os) const override;
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp
new file mode 100644
index 00000000000..4ee468e424b
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/implicit_levenshtein_dfa.hpp
@@ -0,0 +1,121 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_stepping_base.h"
+#include "implicit_levenshtein_dfa.h"
+#include "match_algorithm.hpp"
+#include "sparse_state.h"
+#include <cassert>
+#include <stdexcept>
+
+namespace vespalib::fuzzy {
+
+// DfaMatcher adapter for implicit DFA implementation
+template <typename Traits>
+struct ImplicitDfaMatcher : public DfaSteppingBase<Traits> {
+    using Base = DfaSteppingBase<Traits>;
+
+    using StateType = typename Base::StateType;
+    using EdgeType  = uint32_t; // Just the raw u32 character value
+
+    using StateParamType = const StateType&;
+
+    using Base::_u32_str;
+    using Base::max_edits;
+    using Base::start;
+    using Base::match_edit_distance;
+    using Base::step;
+    using Base::can_wildcard_step;
+    using Base::is_match;
+    using Base::can_match;
+
+    explicit ImplicitDfaMatcher(std::span<const uint32_t> u32_str) noexcept
+        : Base(u32_str)
+    {}
+
+    // start, is_match, can_match, match_edit_distance are all provided by base type
+
+    template <typename F>
+    bool has_any_char_matching(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) {
+        for (uint32_t i = 0; i < state.size(); ++i) {
+            const auto idx = state.index(i);
+            if ((idx < _u32_str.size()) && f(_u32_str[idx])) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    template <typename F>
+    void for_each_char(const StateType& state, F&& f) const noexcept(noexcept(f(uint32_t{}))) {
+        for (uint32_t i = 0; i < state.size(); ++i) {
+            const auto idx = state.index(i);
+            if ((idx < _u32_str.size())) [[likely]] {
+                f(_u32_str[idx]);
+            }
+        }
+    }
+
+    bool has_explicit_higher_out_edge(const StateType& state, uint32_t ch) const noexcept {
+        return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept {
+            return state_ch > ch;
+        });
+    }
+
+    bool has_higher_out_edge(const StateType& state, uint32_t mch) const noexcept {
+        return (has_explicit_higher_out_edge(state, mch) || can_wildcard_step(state));
+    }
+    StateType match_input(const StateType& state, uint32_t mch) const noexcept {
+        return step(state, mch);
+    }
+    bool valid_state(const StateType& state) const noexcept {
+        return !state.empty();
+    }
+    StateType match_wildcard(const StateType& state) const noexcept {
+        return step(state, WILDCARD);
+    }
+    bool has_exact_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept {
+        return has_any_char_matching(state, [ch](uint32_t state_ch) noexcept {
+            return state_ch == ch;
+        });
+    }
+    EdgeType lowest_higher_explicit_out_edge(const StateType& state, uint32_t ch) const noexcept {
+        uint32_t min_ch = UINT32_MAX;
+        for_each_char(state, [ch, &min_ch](uint32_t state_ch) noexcept {
+            if ((state_ch > ch) && (state_ch < min_ch)) {
+                min_ch = state_ch;
+            }
+        });
+        return min_ch;
+    }
+    EdgeType smallest_explicit_out_edge(const StateType& state) const noexcept {
+        uint32_t min_ch = UINT32_MAX;
+        for_each_char(state, [&min_ch](uint32_t state_ch) noexcept {
+            min_ch = std::min(min_ch, state_ch);
+        });
+        return min_ch;
+    }
+    bool valid_edge(EdgeType edge) const noexcept {
+        return edge != UINT32_MAX;
+    }
+    uint32_t edge_to_u32char(EdgeType edge) const noexcept {
+        return edge;
+    }
+    StateType edge_to_state(const StateType& state, EdgeType edge) const noexcept {
+        return step(state, edge);
+    }
+};
+
+template <typename Traits>
+LevenshteinDfa::MatchResult
+ImplicitLevenshteinDfa<Traits>::match(std::string_view u8str, std::string* successor_out) const {
+    ImplicitDfaMatcher<Traits> matcher(_u32_str_buf);
+    return MatchAlgorithm<Traits::max_edits()>::match(matcher, u8str, successor_out);
+}
+
+template <typename Traits>
+void ImplicitLevenshteinDfa<Traits>::dump_as_graphviz(std::ostream&) const {
+    throw std::runtime_error("Graphviz output not available for implicit Levenshtein DFA");
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp
new file mode 100644
index 00000000000..e75ef8365bf
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.cpp
@@ -0,0 +1,83 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "explicit_levenshtein_dfa.h"
+#include "implicit_levenshtein_dfa.h"
+#include "levenshtein_dfa.h"
+#include <vespa/vespalib/util/stringfmt.h>
+#include <memory>
+
+namespace vespalib::fuzzy {
+
+LevenshteinDfa::LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept
+    : _impl(std::move(impl))
+{}
+
+LevenshteinDfa::LevenshteinDfa(LevenshteinDfa&&) noexcept = default;
+LevenshteinDfa& LevenshteinDfa::operator=(LevenshteinDfa&&) noexcept = default;
+
+LevenshteinDfa::~LevenshteinDfa() = default;
+
+LevenshteinDfa::MatchResult
+LevenshteinDfa::match(std::string_view u8str, std::string* successor_out) const {
+    return _impl->match(u8str, successor_out);
+}
+
+size_t LevenshteinDfa::memory_usage() const noexcept {
+    return _impl->memory_usage();
+}
+
+void LevenshteinDfa::dump_as_graphviz(std::ostream& out) const {
+    _impl->dump_as_graphviz(out);
+}
+
+LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits, DfaType dfa_type) {
+    if (max_edits != 1 && max_edits != 2) {
+        throw std::invalid_argument(make_string("Levenshtein DFA max_edits must be in {1, 2}, was %u", max_edits));
+    }
+    if (dfa_type == DfaType::Implicit) {
+        if (max_edits == 1) {
+            return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<1>>>(target_string));
+        } else { // max_edits == 2
+            return LevenshteinDfa(std::make_unique<ImplicitLevenshteinDfa<FixedMaxEditDistanceTraits<2>>>(target_string));
+        }
+    } else { // DfaType::Explicit
+        if (max_edits == 1) {
+            return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<1>>(target_string).build_dfa();
+        } else { // max_edits == 2
+            return ExplicitLevenshteinDfaBuilder<FixedMaxEditDistanceTraits<2>>(target_string).build_dfa();
+        }
+    }
+
+}
+
+LevenshteinDfa LevenshteinDfa::build(std::string_view target_string, uint8_t max_edits) {
+    // TODO automatically select implementation based on target length/max edits?
+    //  Suggestion:
+    //   - Explicit DFA iff (k == 1 && |target| <= 256) || (k == 2 && |target| <= 64).
+    //   - Implicit DFA otherwise.
+    //  This keeps memory overhead < 64k and DFA construction time < 300 usec (measured on
+    //  an M1 Pro; your mileage may vary etc).
+    //  Ideally the implicit DFA would always be the fastest (or at least approximately as
+    //  fast as the explicit DFA), but this is not yet the case.
+    return build(target_string, max_edits, DfaType::Implicit);
+}
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos) {
+    if (mos.matches()) {
+        os << "match(" << static_cast<int>(mos.edits()) << " edits)";
+    } else {
+        os << "mismatch";
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt) {
+    if (dt == LevenshteinDfa::DfaType::Implicit) {
+        os << "Implicit";
+    } else {
+        assert(dt == LevenshteinDfa::DfaType::Explicit);
+        os << "Explicit";
+    }
+    return os;
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h
new file mode 100644
index 00000000000..a26ccbe87ee
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/levenshtein_dfa.h
@@ -0,0 +1,244 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <string_view>
+
+namespace vespalib::fuzzy {
+
+/**
+ * Levenshtein Deterministic Finite Automata (DFA)
+ *
+ * The Levenshtein distance (or edit distance) is the minimum number of edits (additions,
+ * deletions or substitutions) needed to transform a particular source string s to a
+ * particular target string t.
+ *
+ * Let m be the length of the source string and n be the length of the target string.
+ *
+ * The classic dynamic programming algorithm uses a n x m cost matrix and is therefore
+ * O(nm) in space and time. By observing that only 2 rows of the matrix are actually
+ * needed, this is commonly reduced to O(n) space complexity (still O(nm) time complexity).
+ * When the maximum number of allowed edits is constrained to k, some clever observations
+ * about the nature of the cost matrix allows for reducing the time complexity down to
+ * O(kn) (more specifically, O((2k+1) * n)). When k is fixed (e.g. k in {1, 2}), the
+ * time complexity simplifies down to O(n).
+ *
+ * This implements code for building and evaluating Levenshtein Deterministic Finite
+ * Automata, where the resulting DFA efficiently matches all possible source strings that
+ * can be transformed to the target string within k max edits. This allows for easy linear
+ * matching of strings.
+ *
+ * Inspiration:
+ *   - http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
+ *   - https://julesjacobs.com/2015/06/17/disqus-levenshtein-simple-and-fast.html
+ *
+ * The latter in particular was a close inspiration for the sparse DFA state management.
+ *
+ * ====== Dictionary skipping via successor string generation ======
+ *
+ * Scanning for edit distance matches frequently takes place against a sorted dictionary.
+ * When matching using a DFA, in the case where the source string does _not_ match, we can
+ * generate the _successor_ string; the next matching string that is lexicographically
+ * _greater_ than the source string. This string has the invariant that there are no
+ * possibly matching strings within k edits ordered after the source string but before
+ * the successor.
+ *
+ * This lets us do possibly massive leaps forward in the dictionary, turning a dictionary
+ * scan into a sublinear operation.
+ *
+ * Note that the implemented successor algorithm is slightly different from that described
+ * in the above blog post. The implemented algorithm requires zero extra data structures
+ * than the DFA itself and the target string and tries to be extra clever with reducing
+ * the number of code point conversions required
+ *
+ * ====== Unicode support ======
+ *
+ * Matching and successor generation is fully Unicode-aware. All input strings are expected
+ * to be in UTF-8, and the generated successor is also encoded as UTF-8 (with some caveats;
+ * see the documentation for match()).
+ *
+ * Internally, matching is done on UTF-32 code points and the DFA itself is built around
+ * UTF-32. This is unlike Lucene, which converts a UTF-32 DFA to an equivalent UTF-8 DFA.
+ *
+ * ====== Memory usage ======
+ *
+ * There is always a baseline DFA memory usage O(n) in the target string, as the
+ * underlying DFA needs to convert the input UTF-8 string to explicit UTF-32 chars.
+ *
+ * Aside from the baseline, memory usage depends on whether an explicit or implicit DFA
+ * is used.
+ *
+ * ------ Explicit DFA ------
+ *
+ * The explicit DFA graph takes up quite a bit more memory than the original string
+ * representation (one reason is the use of UTF-32 characters under the hood).
+ *
+ * Expected upper bound memory usage for a string of length n with max edits k is
+ *
+ *   (2k+1) * N(k) * n * W(k)
+ *
+ * where N(1) is expected to be 32 and N(2) is 48, W(1) is 1.34 and W(2) is 3.2 (empirically
+ * derived).
+ *
+ * Memory usage during building is higher due to keeping track of the set of generated
+ * states in a hash table, but still linear in input size. This extra memory is freed
+ * once building is complete.
+ *
+ * ------ Implicit DFA ------
+ *
+ * Implicit DFAs have a O(1) memory usage during evaluation, which all lives on the stack
+ * or in registers (this does not include the successor string, which is provided by the
+ * caller).
+ *
+ * Since the sparse state stepping is currently not as fast as explicit DFA node traversal,
+ * string matching is slower than with the explicit DFA.
+ *
+ * ====== In short ======
+ *
+ *  - Immutable; build once, run many times.
+ *  - Explicit DFA build time is amortized linear in target string size.
+ *  - Implicit DFA build time is O(1) (aside from initial UTF-32 conversion)
+ *  - Zero-allocation matching.
+ *  - Matching takes in raw UTF-8 input, no need to pre-convert.
+ *    - Streaming UTF-8 to UTF-32 conversion; fully unicode-aware (DFA uses UTF-32 code
+ *      points internally).
+ *    - If required, it's possible (but not currently implemented) to bake case
+ *      insensitive matching semantics into the generated DFA itself.
+ *  - Allows for dictionary forward-skipping via successor algorithm.
+ *  - Amortized zero allocations for successor string building when reusing string
+ *    between matches.
+ *  - Successor string is generated in-place as UTF-8 and can be directly used as input
+ *    to a byte-wise dictionary seek.
+ */
+class LevenshteinDfa {
+public:
+    class MatchResult {
+        uint8_t _max_edits;
+        uint8_t _edits;
+    public:
+        constexpr MatchResult(uint8_t max_edits, uint8_t edits) noexcept
+            : _max_edits(max_edits),
+              _edits(edits)
+        {}
+
+        static constexpr MatchResult make_match(uint8_t max_edits, uint8_t edits) noexcept {
+            return {max_edits, edits};
+        }
+
+        static constexpr MatchResult make_mismatch(uint8_t max_edits) noexcept {
+            return {max_edits, static_cast<uint8_t>(max_edits + 1)};
+        }
+
+        [[nodiscard]] constexpr bool matches() const noexcept { return _edits <= _max_edits; }
+        [[nodiscard]] constexpr uint8_t edits() const noexcept { return _edits; }
+        [[nodiscard]] constexpr uint8_t max_edits() const noexcept { return _max_edits; }
+    };
+
+    struct Impl {
+        virtual ~Impl() = default;
+        [[nodiscard]] virtual MatchResult match(std::string_view u8str, std::string* successor_out) const = 0;
+        [[nodiscard]] virtual size_t memory_usage() const noexcept = 0;
+        virtual void dump_as_graphviz(std::ostream& out) const = 0;
+    };
+
+private:
+    std::unique_ptr<Impl> _impl;
+public:
+    explicit LevenshteinDfa(std::unique_ptr<Impl> impl) noexcept;
+    LevenshteinDfa(LevenshteinDfa&&) noexcept;
+    LevenshteinDfa& operator=(LevenshteinDfa&&) noexcept;
+    LevenshteinDfa(const LevenshteinDfa&) = delete;
+    LevenshteinDfa& operator=(const LevenshteinDfa&) = delete;
+    ~LevenshteinDfa();
+
+    /**
+     * Attempts to match the source string `source` with the target string this DFA was
+     * built with, emitting a successor string on mismatch if `successor_out` != nullptr.
+     *
+     * `source` must not contain any null UTF-8 chars.
+     *
+     * Match case:
+     * Iff `source` is _within_ the maximum edit distance, returns a MatchResult with
+     * matches() == true and edits() == the actual edit distance. If `successor_out`
+     * is not nullptr, the string pointed to is _not_ modified.
+     *
+     * Mismatch case:
+     * Iff `source` is _beyond_ the maximum edit distance, returns a MatchResult with
+     * matches() == false.
+     *
+     * Iff `successor_out` is not nullptr, the following holds:
+     *   - `successor_out` is modified to contain the next (in byte-wise ordering) possible
+     *     _matching_ string S so that there exists no other matching string S' that is
+     *     greater than `source` but smaller than S.
+     *   - `successor_out` contains UTF-8 bytes that are within what UTF-8 can legally
+     *     encode in bitwise form, but the _code points_ they encode may not be valid.
+     *     In particular, surrogate pair ranges and U+10FFFF+1 may be encoded, neither of
+     *     which are valid UTF-8.
+     *
+     * It is expected that the consumer of `successor_out` is only interested in the
+     * memcmp()-ordering of strings and not whether they are technically valid Unicode.
+     * This should be the case for low-level dictionary data structures etc.
+     *
+     * Memory allocation:
+     * This function does not directly or indirectly allocate any heap memory if either:
+     *
+     *   - the input string is within the max edit distance, or
+     *   - `successor_out` is nullptr, or
+     *   - `successor_out` has sufficient capacity to hold the generated successor
+     *
+     * By reusing the successor string across many calls, this therefore amortizes memory
+     * allocations down to near zero per invocation.
+     */
+    [[nodiscard]] MatchResult match(std::string_view source, std::string* successor_out) const;
+
+    /**
+     * Returns how much memory is used by the underlying DFA representation, in bytes.
+     */
+    [[nodiscard]] size_t memory_usage() const noexcept;
+
+    enum class DfaType {
+        Implicit,
+        Explicit
+    };
+
+    /**
+     * Builds and returns a Levenshtein DFA that matches all strings within `max_edits`
+     * edits of `target_string`. The type of DFA returned is specified by dfa_type.
+     *
+     * `max_edits` must be in {1, 2}. Throws std::invalid_argument if outside range.
+     *
+     * `target_string` must not contain any null UTF-8 chars.
+     */
+    [[nodiscard]] static LevenshteinDfa build(std::string_view target_string,
+                                              uint8_t max_edits,
+                                              DfaType dfa_type);
+
+    /**
+     * Same as build() but currently always returns an implicit DFA.
+     */
+    [[nodiscard]] static LevenshteinDfa build(std::string_view target_string, uint8_t max_edits);
+
+    /**
+     * Dumps the DFA as a Graphviz graph in text format to the provided output stream.
+     *
+     * Note: Only supported for _explicit_ DFAs. Trying to call this function on an implicit
+     * DFA will throw a std::runtime_error, as there is no concrete underlying graph
+     * structure to dump.
+     *
+     * Note that only _matching_ state transitions are present in the DFA, and therefore only
+     * such transitions are present in the generated graph. Overall this makes the graph for
+     * longer strings much more manageable, as the number of out-edges from a particular depth
+     * in the graph depends on the max number of edits and not on the length of the string
+     * itself. Otherwise, you'd have a whole bunch of nodes with out-edges to the same terminal
+     * non-matching state node.
+     */
+    void dump_as_graphviz(std::ostream& out) const;
+};
+
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::MatchResult& mos);
+std::ostream& operator<<(std::ostream& os, const LevenshteinDfa::DfaType& dt);
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp
new file mode 100644
index 00000000000..206b69f8ebe
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/match_algorithm.hpp
@@ -0,0 +1,291 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "dfa_matcher.h"
+#include "levenshtein_dfa.h"
+#include "unicode_utils.h"
+#include <vespa/vespalib/text/utf8.h>
+#include <cassert>
+#include <concepts>
+
+namespace vespalib::fuzzy {
+
+/**
+ * Implementation of algorithm for linear-time k-max edits string matching and successor
+ * string generation over an abstract DFA representation.
+ *
+ * The implementation is agnostic to how the underlying DFA is implemented, but requires
+ * an appropriate adapter that satisfies the DfaMatcher concept contracts.
+ */
+template <uint8_t MaxEdits>
+struct MatchAlgorithm {
+    using MatchResult = LevenshteinDfa::MatchResult;
+
+    static constexpr uint8_t max_edits() noexcept { return MaxEdits; }
+
+    /**
+     * Matches UTF-8 source string `source` against the target DFA, optionally generating
+     * the successor string iff the source string is not within the maximum number of edits
+     * of the target string.
+     *
+     * The actual match loop is very simple: we try to match the DFA as far as we can
+     * before either consuming all input (source string) characters or ending up in a non-
+     * matching state before we have consumed all input. In the former case, we may be in
+     * a matching state (consider matching "foo" with the target string "food"; after
+     * consuming all input we'll be in a matching state with 1 edit). In the latter case,
+     * the input string cannot possible match.
+     *
+     * If we end up in a matching state, all is well. We simply return a MatchResult with
+     * the number of edits the state represents.
+     *
+     * The interesting bit happens the string does _not_ match and we are asked to provide a
+     * _successor_ string that _does_ match and is strictly greater in lexicographic order.
+     *
+     * We lean on some core invariants:
+     *
+     *  - The m x n (|source| x |target|) Levenshtein matrix provides, for any m[i, j] with
+     *    i in [1, m], j in [1, n], the _minimum possible_ number of edits that can transform
+     *    the source string prefix of length `i` to the target string prefix of length `j`.
+     *    This means there is no way of transforming said source prefix using _fewer_ edits.
+     *
+     *  - Any given DFA state corresponds to a unique row in the Levenshtein matrix, thus
+     *    transitively inheriting the invariants of the matrix row elements themselves, such
+     *    as representing the minimum number of edits.
+     *
+     * We have two mismatch cases:
+     *
+     * 1. We've matched the entire source string without ending in an accepting state.
+     *
+     * This can only happen if the input is a (possibly edited) prefix of the target string.
+     * Any and all _longer_ strings with this prefix is inherently lexicographically greater,
+     * so we emit the smallest possible suffix that turns prefix || suffix into a matching
+     * string.
+     *
+     * See emit_smallest_matching_suffix() for details.
+     *
+     * 2. We've matched a prefix of the source string without ending in an accepting state.
+     *
+     * This case is trickier than when the entire source string is a prefix, as we cannot
+     * just emit a suffix to the source to create a matching, lexicographically greater string.
+     *
+     * Consider source "foxx" and target "food". There exists no suffix S in "food" that can
+     * turn "foxx" || S into a matching string within k=1 edits.
+     *
+     * So we have to backtrack to somewhere.
+     *
+     * That "somewhere" is the state that maximizes the size of the source prefix while
+     * allowing us to emit a greater suffix.
+     *
+     * For each state we visit, we check if there exists at least one higher out edge than the
+     * one taken out from that state (this is possibly a wildcard edge). If one exists, we
+     * copy the state to `last_state_with_higher_out` and remember the state's source string
+     * prefix as well as the source string character that transitions us away from the state
+     * (this will be our candidate for building a greater suffix).
+     *
+     * When we fail to match the entire source string, we know that last_state_with_higher_out
+     * represents the last possible branching point (and therefore the longest prefix) where
+     * we can substitute in or insert a higher character, in turn creating a greater suffix.
+     *
+     * Proof by contradiction: let `last_state_with_higher_out` be S and assume there exists
+     * a state S' that has a greater source string prefix than S while still allowing for
+     * emitting a lexicographically greater suffix that is within max edits k. We terminate
+     * the match loop once can_match(X) is false for any state X, where X subsumes S by
+     * definition. For S' to exist, it must be possible for a transition to exist from X to
+     * a later state that can have a higher out edge. However, edit distance costs can
+     * never decrease, only stay constant (with matching substitutions) or increase (with
+     * insertions, deletions or non-matching substitutions), so it's impossible to follow
+     * an out-edge from X to any later potentially matching state. Thus, S' can not exist
+     * and we have a contradiction.
+     *
+     * Since we want to generate the smallest possible larger string that matches, we ideally
+     * want to emit a character that is +1 of the source character after the shared prefix.
+     * This is using the "higher out"-character we remembered earlier. We do this if we have
+     * a wildcard out edge (or if there exists an explicit out-edge for value char+1).
+     * Otherwise, we have to follow the highest explicitly present out-edge.
+     *
+     * Once we have emitted one single character that gets us lexicographically higher than
+     * the source string, we then emit the smallest possible suffix to this. This uses the
+     * same minimal suffix generation logic as mismatch case 1).
+     *
+     * See `backtrack_and_emit_greater_suffix()` for details.
+     *
+     * Example:
+     * (This is easiest to follow by looking at examples/food_dfa.svg)
+     *
+     * Source "foxx", target "food" and k=1:
+     *
+     * After matching "fo" with 0 edits we reach a state with out-edges {d, o, *}. This state
+     * has an implicitly higher out-edge (*) and we remember it and the char 'x' for later.
+     * Edge 'x' can only happen via *, so we take that path.
+     *
+     * After matching "fox" with 1 edit we reach a state with out-edges {d, o}. There is
+     * no out-edge for 'x' and the state is not a matching state, so we need to backtrack
+     * and generate a successor.
+     *
+     * We backtrack to the state representing "fo" and emit it as a successor prefix. We
+     * observe that this state has a wildcard out-edge and emit 'x'+1 == 'y' to the successor
+     * string and continue with emitting the smallest suffix. We now have a successor
+     * prefix of "foy", with which we reach the same logical state as we did with "fox"
+     * previously. The smallest out-edge here is 'd', so we take it. This leaves us in an
+     * accepting (matching) state, so suffix generation completes.
+     *
+     *   "foxx" -> "foyd"
+     *
+     * Note that it's possible for the prefix to be empty, which results in a successor
+     * that has nothing in common with the source altogether.
+     *   Example: "gp" -> "hfood" (+1 char value case)
+     *
+     * Performance note:
+     * Both the input and successor output strings are in UTF-8 format. To avoid doing
+     * duplicate work, we keep track of the byte length of the string prefix that will be
+     * part of the successor and simply copy it verbatim instead of building the string
+     * from converted UTF-32 -> UTF-8 chars as we go.
+     *
+     * TODO we could probably also optimize the smallest suffix generation with this when
+     *   we know we can no longer insert any smaller char substitutions and the only way
+     *   to complete the string is to emit it verbatim.
+     *    - To do this we'd need both the original UTF-8 target string as well as a
+     *      secondary vector that maps u32 character index to the corresponding UTF-8 index.
+     *      Both trivial to get as part of DFA initialization.
+     */
+    template <DfaMatcher Matcher>
+    static MatchResult match(const Matcher& matcher,
+                             std::string_view source,
+                             std::string* successor_out)
+    {
+        using StateType = typename Matcher::StateType;
+        vespalib::Utf8Reader u8_reader(source.data(), source.size());
+        uint32_t n_prefix_u8_bytes = 0;
+        uint32_t char_after_prefix = 0;
+        StateType last_state_with_higher_out = StateType{};
+
+        StateType state = matcher.start();
+        while (u8_reader.hasMore()) {
+            const auto u8_pos_before_char = u8_reader.getPos();
+            const uint32_t mch = u8_reader.getChar();
+            if (successor_out && matcher.has_higher_out_edge(state, mch)) {
+                last_state_with_higher_out = state;
+                n_prefix_u8_bytes = u8_pos_before_char;
+                char_after_prefix = mch;
+            }
+            auto maybe_next = matcher.match_input(state, mch);
+            if (matcher.can_match(maybe_next)) {
+                state = maybe_next;
+            } else {
+                // Can never match; find the successor if requested
+                if (successor_out) {
+                    *successor_out = source.substr(0, n_prefix_u8_bytes);
+                    assert(matcher.valid_state(last_state_with_higher_out));
+                    backtrack_and_emit_greater_suffix(matcher, last_state_with_higher_out,
+                                                      char_after_prefix, *successor_out);
+                }
+                return MatchResult::make_mismatch(max_edits());
+            }
+        }
+        const auto edits = matcher.match_edit_distance(state);
+        if (edits <= max_edits()) {
+            return MatchResult::make_match(max_edits(), edits);
+        }
+        if (successor_out) {
+            *successor_out = source;
+            emit_smallest_matching_suffix(matcher, state, *successor_out);
+        }
+        return MatchResult::make_mismatch(max_edits());
+    }
+
+    /**
+     * Instantly backtrack to the last possible branching point in the DFA where we can
+     * choose some higher outgoing edge character value and still match the DFA. If the node
+     * has a wildcard edge, we can bump the input char by one and generate the smallest
+     * possible matching suffix to that. Otherwise, choose the smallest out edge that is
+     * greater than the input character at that location and _then_ emit the smallest
+     * matching prefix.
+     *
+     * precondition: `last_node_with_higher_out` has either a wildcard edge or a char match
+     *    edge that compares greater than `input_at_branch`.
+     */
+    template <DfaMatcher Matcher>
+    static void backtrack_and_emit_greater_suffix(
+            const Matcher& matcher,
+            typename Matcher::StateParamType last_state_with_higher_out,
+            const uint32_t input_at_branch,
+            std::string& successor)
+    {
+        auto wildcard_state = matcher.match_wildcard(last_state_with_higher_out);
+        if (matcher.can_match(wildcard_state)) {
+            // `input_at_branch` may be U+10FFFF, with +1 being outside legal Unicode _code point_
+            // range but _within_ what UTF-8 can technically _encode_.
+            // We assume that successor-consumers do not care about anything except byte-wise
+            // ordering. This is similar to what RE2's PossibleMatchRange emits to represent a
+            // UTF-8 upper bound, so not without precedent.
+            // If the resulting character corresponds to an existing out-edge we _must_ take it
+            // instead of the wildcard edge, or we'll end up in the wrong state.
+            const auto next_char = input_at_branch + 1;
+            if (!matcher.has_exact_explicit_out_edge(last_state_with_higher_out, next_char)) {
+                append_utf32_char_as_utf8(successor, next_char);
+                emit_smallest_matching_suffix(matcher, wildcard_state, successor);
+                return;
+            } // else: handle exact match below (it will be found as the first higher out edge)
+        }
+        const auto first_highest_edge = matcher.lowest_higher_explicit_out_edge(last_state_with_higher_out, input_at_branch);
+        assert(matcher.valid_edge(first_highest_edge));
+        append_utf32_char_as_utf8(successor, matcher.edge_to_u32char(first_highest_edge));
+        emit_smallest_matching_suffix(matcher, matcher.edge_to_state(last_state_with_higher_out, first_highest_edge), successor);
+    }
+
+    /**
+     * The smallest possible suffix is generated by following the smallest out-edge per state,
+     * until we reach a state that is a match. It is possible that the smallest out edge is a
+     * "wildcard" edge (our terminology), which means that we can insert/substitute an arbitrary
+     * character and still have `can_match(resulting state)` be true. In this case we emit the
+     * smallest possible non-null UTF-8 character (0x01).
+     *
+     * Examples:
+     *   (These are easiest to follow by looking at examples/food_dfa.svg)
+     *
+     *   Source "fo", target "food" and k=1:
+     *
+     *   After matching "fo" we have 1 edit to spare. The smallest valid, non-empty UTF-8 suffix
+     *   to this string must necessarily begin with 0x01, so that's what we emit. The smallest
+     *   edge we can follow from the resulting state is 'd', and that is a accepting (matching)
+     *   state.
+     *
+     *     "fo" -> "fo\x01d"
+     *
+     *   Source "fx", target "food" and k=1:
+     *
+     *   After matching "fx" we have no edits to spare. The smallest character reachable from
+     *   the state is 'o' (in fact, it is the only out edge available since we're down to zero
+     *   available edits). The next state has an out-edge to 'd' and 'o', and we choose 'd'
+     *   since it is smallest. This leaves us in an accepting (matching) state and we terminate
+     *   the loop.
+     *
+     *     "fx" -> "fxod"
+     */
+     // TODO consider variant for only emitting _prefix of suffix_ to avoid having to generate
+     //  the full string? Won't generate a matching string, but will be lexicographically greater.
+    template <DfaMatcher Matcher>
+    static void emit_smallest_matching_suffix(
+            const Matcher& matcher,
+            typename Matcher::StateParamType from,
+            std::string& str)
+    {
+        auto state = from;
+        while (!matcher.is_match(state)) {
+            // If we can take a wildcard path, emit the smallest possible valid UTF-8 character (0x01).
+            // Otherwise, find the smallest char that can eventually lead us to a match.
+            auto wildcard_state = matcher.match_wildcard(state);
+            if (matcher.can_match(wildcard_state)) {
+                str += '\x01';
+                state = wildcard_state;
+            } else {
+                const auto smallest_out_edge = matcher.smallest_explicit_out_edge(state);
+                assert(matcher.valid_edge(smallest_out_edge));
+                append_utf32_char_as_utf8(str, matcher.edge_to_u32char(smallest_out_edge));
+                state = matcher.edge_to_state(state, smallest_out_edge);
+            }
+        }
+    }
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h
new file mode 100644
index 00000000000..40cfa5e6409
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/sparse_state.h
@@ -0,0 +1,175 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <ostream>
+#include <span>
+#include <xxh3.h> // TODO factor out?
+
+namespace vespalib::fuzzy {
+
+// Sentinel U32 char for state stepping that cannot match any target string characters
+constexpr const uint32_t WILDCARD = UINT32_MAX;
+
+/**
+ * diag(n) is the width of the diagonal of the cost matrix that can possibly be
+ * within k edits. This means that for a fixed k, it suffices to maintain state
+ * for up to and including diag(k) consecutive cells for any given matrix row.
+ */
+constexpr inline uint8_t diag(uint8_t k) noexcept {
+    return k*2 + 1;
+}
+
+template <uint8_t MaxEdits>
+struct FixedSparseState {
+private:
+    static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+    std::array<uint32_t, diag(MaxEdits)> indices;
+    std::array<uint8_t,  diag(MaxEdits)> costs; // elems are 1-1 with indices vector
+    uint8_t sz;
+public:
+    constexpr FixedSparseState() noexcept : indices(), costs(), sz(0) {}
+
+    [[nodiscard]] constexpr bool empty() const noexcept {
+        return (sz == 0);
+    }
+
+    [[nodiscard]] constexpr uint32_t size() const noexcept {
+        return sz;
+    }
+
+    [[nodiscard]] constexpr uint32_t index(uint32_t entry_idx) const noexcept {
+        return indices[entry_idx];
+    }
+
+    [[nodiscard]] constexpr uint8_t cost(uint32_t entry_idx) const noexcept {
+        return costs[entry_idx];
+    }
+
+    // Precondition: !empty()
+    [[nodiscard]] constexpr uint32_t last_index() const noexcept {
+        return indices[sz - 1];
+    }
+
+    // Precondition: !empty()
+    [[nodiscard]] constexpr uint8_t last_cost() const noexcept {
+        return costs[sz - 1];
+    }
+
+    void append(uint32_t index, uint8_t cost) noexcept {
+        assert(sz < diag(MaxEdits));
+        indices[sz] = index;
+        costs[sz] = cost;
+        ++sz;
+    }
+
+    constexpr bool operator==(const FixedSparseState& rhs) const noexcept {
+        if (sz != rhs.sz) {
+            return false;
+        }
+        return (std::equal(indices.begin(), indices.begin() + sz, rhs.indices.begin()) &&
+                std::equal(costs.begin(),   costs.begin()   + sz, rhs.costs.begin()));
+    }
+
+    struct hash {
+        size_t operator()(const FixedSparseState& s) const noexcept {
+            static_assert(std::is_same_v<uint32_t, std::decay_t<decltype(s.indices[0])>>);
+            static_assert(std::is_same_v<uint8_t,  std::decay_t<decltype(s.costs[0])>>);
+            // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+            return (XXH3_64bits(s.indices.data(), s.sz * sizeof(uint32_t)) ^
+                    XXH3_64bits(s.costs.data(), s.sz));
+#pragma GCC diagnostic pop
+        }
+    };
+};
+
+/**
+ * Prints sparse states as a single matrix row. Columns prior to any state index
+ * are printed explicitly as '-' characters to make states line up when printed.
+ *
+ * Example output for the state (2:1, 3:1):
+ *
+ *   [-, -, 1, 1]
+ *
+ * Only meant as a debugging aid during development, as states with high indices
+ * will emit very large strings.
+ */
+template <uint8_t MaxEdits> [[maybe_unused]]
+std::ostream& operator<<(std::ostream& os, const FixedSparseState<MaxEdits>& s) {
+    os << "[";
+    size_t last_idx = 0;
+    for (size_t i = 0; i < s.size(); ++i) {
+        if (i != 0) {
+            os << ", ";
+        }
+        for (size_t j = last_idx; j < s.indices[i]; ++j) {
+            os << "-, ";
+        }
+        last_idx = s.indices[i] + 1;
+        os << static_cast<uint32_t>(s.costs[i]);
+    }
+    os << "]";
+    return os;
+}
+
+template <uint8_t MaxEdits>
+struct FixedMaxEditsTransitions {
+    static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+
+    std::array<uint32_t, diag(MaxEdits)> out_u32_chars;
+    uint8_t size;
+
+    constexpr FixedMaxEditsTransitions() noexcept : out_u32_chars(), size(0) {}
+
+    [[nodiscard]] constexpr bool has_char(uint32_t u32ch) const noexcept {
+        for (uint8_t i = 0; i < size; ++i) {
+            if (out_u32_chars[i] == u32ch) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    void add_char(uint32_t u32ch) noexcept {
+        if (!has_char(u32ch)) {
+            assert(size < diag(MaxEdits));
+            out_u32_chars[size] = u32ch;
+            ++size;
+        }
+    }
+
+    constexpr std::span<const uint32_t> u32_chars() const noexcept {
+        return {out_u32_chars.begin(), out_u32_chars.begin() + size};
+    }
+
+    constexpr std::span<uint32_t> u32_chars() noexcept {
+        return {out_u32_chars.begin(), out_u32_chars.begin() + size};
+    }
+
+    void sort() noexcept {
+        // TODO use custom sorting networks for fixed array sizes <= 5?
+        // FIXME GCC 12.2 worse-than-useless(tm) warning false positives :I
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+        std::sort(out_u32_chars.begin(), out_u32_chars.begin() + size);
+#pragma GCC diagnostic pop
+    }
+};
+
+template <uint8_t MaxEdits>
+struct FixedMaxEditDistanceTraits {
+    static_assert(MaxEdits > 0 && MaxEdits <= UINT8_MAX/2);
+    using StateType       = FixedSparseState<MaxEdits>;
+    using TransitionsType = FixedMaxEditsTransitions<MaxEdits>;
+    constexpr static uint8_t max_edits() noexcept {
+        return MaxEdits;
+    }
+};
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp
new file mode 100644
index 00000000000..648be234562
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.cpp
@@ -0,0 +1,108 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "unicode_utils.h"
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <stdexcept>
+
+namespace vespalib::fuzzy {
+
+std::vector<uint32_t> utf8_string_to_utf32(std::string_view str) {
+    vespalib::stringref ch_str(str.data(), str.size());
+    vespalib::Utf8Reader utf8_reader(ch_str);
+    std::vector<uint32_t> u32ret;
+    u32ret.reserve(str.size()); // Will over-allocate for all non-ASCII
+    while (utf8_reader.hasMore()) {
+        u32ret.emplace_back(utf8_reader.getChar());
+    }
+    return u32ret;
+}
+
+std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str) {
+    return utf8_string_to_utf32(std::string_view(reinterpret_cast<const char*>(u8str.data()), u8str.size()));
+}
+
+[[noreturn]] void throw_bad_code_point(uint32_t codepoint) __attribute__((noinline));
+[[noreturn]] void throw_bad_code_point(uint32_t codepoint) {
+    throw std::invalid_argument(make_string("invalid UTF-32 codepoint: U+%04X (%u)", codepoint, codepoint));
+}
+
+namespace {
+
+/**
+ * Encodes a single UTF-32 `codepoint` to a 1-4 byte UTF-8 sequence.
+ * `
+ * `u8buf` must point to a buffer with at least 4 writable bytes.
+ *
+ * Returns the number of bytes written.
+ *
+ * See comments on append_utf32_char_as_utf8() as to why this is not a generic UTF-8
+ * encoding function that can be used in all possible scenarios.
+ */
+[[nodiscard]] uint8_t encode_utf8_char(uint32_t codepoint, unsigned char* u8buf) {
+    constexpr const uint8_t low_6bits_mask = 0x3F;
+
+    // Yanked and modified from utf8.cpp:
+    if (codepoint < 0x80) {
+        u8buf[0] = (char) codepoint;
+        return 1;
+    } else if (codepoint < 0x800) {
+        char low6 = (codepoint & low_6bits_mask);
+        low6 |= 0x80;
+        codepoint >>= 6;
+        char first5 = codepoint;
+        first5 |= 0xC0;
+        u8buf[0] = first5;
+        u8buf[1] = low6;
+        return 2;
+    } else if (codepoint < 0x10000) {
+        char low6 = (codepoint & low_6bits_mask);
+        low6 |= 0x80;
+
+        codepoint >>= 6;
+        char mid6 = (codepoint & low_6bits_mask);
+        mid6 |= 0x80;
+
+        codepoint >>= 6;
+        char first4 = codepoint;
+        first4 |= 0xE0;
+
+        u8buf[0] = first4;
+        u8buf[1] = mid6;
+        u8buf[2] = low6;
+        return 3;
+    } else if (codepoint <= 0x110000) { // Explicitly _include_ U+10FFFF + 1!
+        char low6 = (codepoint & low_6bits_mask);
+        low6 |= 0x80;
+
+        codepoint >>= 6;
+        char mid6 = (codepoint & low_6bits_mask);
+        mid6 |= 0x80;
+
+        codepoint >>= 6;
+        char hi6 = (codepoint & low_6bits_mask);
+        hi6 |= 0x80;
+
+        codepoint >>= 6;
+        char first3 = codepoint;
+        first3 |= 0xF0;
+
+        u8buf[0] = first3;
+        u8buf[1] = hi6;
+        u8buf[2] = mid6;
+        u8buf[3] = low6;
+        return 4;
+    } else {
+        throw_bad_code_point(codepoint);
+    }
+}
+
+} // anon ns
+
+// TODO optimize inlined in header for case where u32_char is < 0x80?
+void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char) {
+    unsigned char u8buf[4];
+    uint8_t u8bytes = encode_utf8_char(u32_char, u8buf);
+    out_str.append(reinterpret_cast<const char*>(u8buf), u8bytes);
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h
new file mode 100644
index 00000000000..8627b01ff6a
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/unicode_utils.h
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace vespalib::fuzzy {
+
+std::vector<uint32_t> utf8_string_to_utf32(std::string_view str);
+
+std::vector<uint32_t> utf8_string_to_utf32(std::u8string_view u8str);
+
+/**
+ * Encodes a single UTF-32 codepoint `u32_char` to a 1-4 byte UTF-8 sequence and
+ * appends it to `out_str.`
+ *
+ * Note that this will happily encode code points that aren't technically part of
+ * the valid UTF-8 range, but which will still be correct in memcmp() byte-wise
+ * ordering, which is the API contract we expose.
+ *
+ * In particular, this includes:
+ *   - high/low surrogate ranges U+D800 through U+DFFF (surrogate pairs not allowed
+ *     in UTF-8)
+ *   - U+10FFFF + 1 (outside max code point range by one)
+ *
+ * ... So don't copy this function for use as a general UTF-8 emitter, as it is not
+ * _technically_ conformant!
+ */
+void append_utf32_char_as_utf8(std::string& out_str, uint32_t u32_char);
+
+}