aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib/src/vespa/vespalib/fuzzy/levenshtein_distance.h
blob: f105dcdaaf4ee5987b3f90a43aecfe04db6c6d38 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <optional>
#include <cstdint>
#include <span>

namespace vespalib {

/**
 * LevenshteinDistance::calculate implements basic Levenshtein distance algorithm
 * with early stopping if the distance is already too high.
 * If the distance is above threshold method would return empty optional,
 * if the distance is below or equal to it, the distance will be wrapped in optional.
 * The types it's built upon are uint32 and it used to compare codepoints from terms,
 * but in future code can be generalized.
 *
 * Algorithm is based off Java implementation from commons-text library
 */
class LevenshteinDistance {
public:
    static std::optional<uint32_t> calculate(std::span<const uint32_t> left, std::span<const uint32_t> right, uint32_t threshold);
};

}