diff options
Diffstat (limited to 'linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java')
-rw-r--r-- | linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java index 74f300057dc..b4f216c4c9c 100644 --- a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java +++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java @@ -7,6 +7,8 @@ import sentencepiece.SentencepieceModel; import java.io.IOException; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; /** * A SentencePiece model @@ -20,6 +22,7 @@ final class Model { final float minScore; final float maxScore; final Trie tokens = new Trie(); + final Map<Integer, String> tokenId2Token = new HashMap<>(); Model(Language language, Path path) { try { @@ -31,6 +34,7 @@ final class Model { for (int i = 0; i < sp.getPiecesCount(); i++) { var piece = sp.getPieces(i); tokens.add(toTokenType(piece.getType()), i, piece.getPiece(), piece.getScore()); + tokenId2Token.put(i, piece.getPiece()); minScore = Math.min(piece.getScore(), minScore); maxScore = Math.max(piece.getScore(), maxScore); } @@ -48,7 +52,7 @@ final class Model { case NORMAL : return TokenType.text; case CONTROL : return TokenType.control; case UNUSED : return TokenType.unused; - default : throw new IllegalArgumentException("Unknkown token type " + type); + default : throw new IllegalArgumentException("Unknown token type " + type); } } |