aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java
blob: 8e7c2db2ed3f8bf1661b64b2b307a2a024245920 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.sentencepiece;

import java.util.HashMap;
import java.util.Map;

/**
 * A simple trie for sentencepiece token lookups.
 *
 * @author bratseth
 */
class Trie {

    final Node root = new Node();

    void add(TokenType type, int id, String word, float score) {
        Node current = root;
        for (char l : word.toCharArray())
            current = current.children.computeIfAbsent(l, c -> new Node());
        current.type = type;
        current.id = id;
        current.score = score;
    }

    static class Node {

        Integer id;
        TokenType type;
        Float score;
        final Map<Character, Node> children = new HashMap<>();

        boolean isToken() { return type != null; }

    }

}