summaryrefslogtreecommitdiffstats
path: root/vespalib/src
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-03-23 17:09:53 +0100
committerAlexey Chernyshev <aleksei@spotify.com>2022-03-23 17:09:53 +0100
commit8b2e9391720de4a568f9594d129a42c32a275c42 (patch)
treeaa5d9ef87ee40f04a74834902490ad9fcf4fce66 /vespalib/src
parent6bcdc1ac1c1c3ce8b30472926098df989b9f7019 (diff)
Added a comment to LevensteinDistance class
Diffstat (limited to 'vespalib/src')
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h b/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h
index e109db3178c..bce7599fc23 100644
--- a/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h
+++ b/vespalib/src/vespa/vespalib/fuzzy/levenstein_distance.h
@@ -7,6 +7,16 @@
namespace vespalib {
+/**
+ * LevensteinDistance::calculate implements basic Levenstein distance algorithm
+ * with early stopping if the distance is already too high.
+ * If the distance is above threshold method would return empty optional,
+ * if the distance is below or equal to it, the distance will be wrapped in optional.
+ * The types it's built upon are uint32 and it used to compare codepoints from terms,
+ * but in future code can be generalized.
+ *
+ * Algorithm is based off Java implementation from commons-text library
+ */
class LevensteinDistance {
public:
static std::optional<uint32_t> calculate(const std::vector<uint32_t>& left, const std::vector<uint32_t>& right, uint32_t threshold);