summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-09-27 23:09:03 +0200
committerGitHub <noreply@github.com>2021-09-27 23:09:03 +0200
commit2df97d23d9f25ae60f010a2e9f273cb5b38e049b (patch)
treed2923a45682e91d80e7011c60cfb301e05acead3 /linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
parent037f756caf4cfb99bcd988174839d7bc385267b9 (diff)
parent8f3fb1a105ded07144f6de527266a438e48a1766 (diff)
Merge pull request #19294 from vespa-engine/bratseth/linguistics-componentsv7.473.17
Bratseth/linguistics components
Diffstat (limited to 'linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java')
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java36
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
new file mode 100644
index 00000000000..8e7c2db2ed3
--- /dev/null
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
@@ -0,0 +1,36 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.sentencepiece;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A simple trie for sentencepiece token lookups.
+ *
+ * @author bratseth
+ */
+class Trie {
+
+ final Node root = new Node();
+
+ void add(TokenType type, int id, String word, float score) {
+ Node current = root;
+ for (char l : word.toCharArray())
+ current = current.children.computeIfAbsent(l, c -> new Node());
+ current.type = type;
+ current.id = id;
+ current.score = score;
+ }
+
+ static class Node {
+
+ Integer id;
+ TokenType type;
+ Float score;
+ final Map<Character, Node> children = new HashMap<>();
+
+ boolean isToken() { return type != null; }
+
+ }
+
+}