aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib/src/vespa/vespalib/fuzzy/levenshtein_distance.h
blob: e284e07173157d6539a486ec3fea0ced3e320e77 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <optional>
#include <cstdint>
#include <span>

namespace vespalib {

/**
 * LevenshteinDistance::calculate implements basic Levenshtein distance algorithm
 * with early stopping if the distance is already too high.
 * If the distance is above threshold method would return empty optional,
 * if the distance is below or equal to it, the distance will be wrapped in optional.
 * The types it's built upon are uint32 and it used to compare codepoints from terms,
 * but in future code can be generalized.
 *
 * Algorithm is based off Java implementation from commons-text library
 */
class LevenshteinDistance {
public:
    // Iff `prefix_match` == true, `right` is the candidate to match against prefix `left`
    static std::optional<uint32_t> calculate(std::span<const uint32_t> left,
                                             std::span<const uint32_t> right,
                                             uint32_t threshold,
                                             bool prefix_match);

    static std::optional<uint32_t> calculate(std::span<const uint32_t> left,
                                             std::span<const uint32_t> right,
                                             uint32_t threshold);
};

}