diff options
Diffstat (limited to 'vespalib/src')
-rw-r--r-- | vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h b/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h index e109db3178c..bce7599fc23 100644 --- a/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h +++ b/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h @@ -7,6 +7,16 @@ namespace vespalib { +/** + * LevensteinDistance::calculate implements basic Levenstein distance algorithm + * with early stopping if the distance is already too high. + * If the distance is above threshold method would return empty optional, + * if the distance is below or equal to it, the distance will be wrapped in optional. + * The types it's built upon are uint32 and it used to compare codepoints from terms, + * but in future code can be generalized. + * + * Algorithm is based off Java implementation from commons-text library + */ class LevensteinDistance { public: static std::optional<uint32_t> calculate(const std::vector<uint32_t>& left, const std::vector<uint32_t>& right, uint32_t threshold); |