diff options
Diffstat (limited to 'linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java')
-rw-r--r-- | linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java new file mode 100644 index 00000000000..8e7c2db2ed3 --- /dev/null +++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.sentencepiece; + +import java.util.HashMap; +import java.util.Map; + +/** + * A simple trie for sentencepiece token lookups. + * + * @author bratseth + */ +class Trie { + + final Node root = new Node(); + + void add(TokenType type, int id, String word, float score) { + Node current = root; + for (char l : word.toCharArray()) + current = current.children.computeIfAbsent(l, c -> new Node()); + current.type = type; + current.id = id; + current.score = score; + } + + static class Node { + + Integer id; + TokenType type; + Float score; + final Map<Character, Node> children = new HashMap<>(); + + boolean isToken() { return type != null; } + + } + +} |