summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java')
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java36
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
new file mode 100644
index 00000000000..8e7c2db2ed3
--- /dev/null
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
@@ -0,0 +1,36 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.sentencepiece;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A simple trie for sentencepiece token lookups.
+ *
+ * @author bratseth
+ */
+class Trie {
+
+ final Node root = new Node();
+
+ void add(TokenType type, int id, String word, float score) {
+ Node current = root;
+ for (char l : word.toCharArray())
+ current = current.children.computeIfAbsent(l, c -> new Node());
+ current.type = type;
+ current.id = id;
+ current.score = score;
+ }
+
+ static class Node {
+
+ Integer id;
+ TokenType type;
+ Float score;
+ final Map<Character, Node> children = new HashMap<>();
+
+ boolean isToken() { return type != null; }
+
+ }
+
+}