diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-09-27 23:09:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-27 23:09:03 +0200 |
commit | 2df97d23d9f25ae60f010a2e9f273cb5b38e049b (patch) | |
tree | d2923a45682e91d80e7011c60cfb301e05acead3 /linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java | |
parent | 037f756caf4cfb99bcd988174839d7bc385267b9 (diff) | |
parent | 8f3fb1a105ded07144f6de527266a438e48a1766 (diff) |
Merge pull request #19294 from vespa-engine/bratseth/linguistics-componentsv7.473.17
Bratseth/linguistics components
Diffstat (limited to 'linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java')
-rw-r--r-- | linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java new file mode 100644 index 00000000000..8e7c2db2ed3 --- /dev/null +++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.sentencepiece; + +import java.util.HashMap; +import java.util.Map; + +/** + * A simple trie for sentencepiece token lookups. + * + * @author bratseth + */ +class Trie { + + final Node root = new Node(); + + void add(TokenType type, int id, String word, float score) { + Node current = root; + for (char l : word.toCharArray()) + current = current.children.computeIfAbsent(l, c -> new Node()); + current.type = type; + current.id = id; + current.score = score; + } + + static class Node { + + Integer id; + TokenType type; + Float score; + final Map<Character, Node> children = new HashMap<>(); + + boolean isToken() { return type != null; } + + } + +} |