diff options
author | Lester Solbakken <lesters@oath.com> | 2023-02-10 14:06:29 +0100 |
---|---|---|
committer | Lester Solbakken <lesters@oath.com> | 2023-02-10 14:06:29 +0100 |
commit | f5118dcd8b04293cf65434f1509fa0e06833492b (patch) | |
tree | c6d77c5a81c7fbfe697e219897a459879871ef0e /linguistics | |
parent | f62bb48baf715609606faa82a6119012b8a727de (diff) |
Add decoding of sentencepiece token sequence to text
Diffstat (limited to 'linguistics')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/Embedder.java | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java index c8ba3395c3c..055861c5388 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java @@ -42,6 +42,17 @@ public interface Embedder { List<Integer> embed(String text, Context context); /** + * Converts the list of token id's into a text. The opposite operation of embed. + * + * @param tokens the list of tokens to decode to a string + * @param context the context which specifies the language used to select a model + * @return the string formed by decoding the tokens back to their string repreesentation + */ + default String decode(List<Integer> tokens, Context context) { + throw new UnsupportedOperationException("Decode is not implemented"); + } + + /** * Converts text into tokens in a tensor. * The information contained in the embedding may depend on the tensor type. * |