diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-09-28 21:51:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-28 21:51:45 +0200 |
commit | 12a415efca5749433fd22424592ddc18f04160f6 (patch) | |
tree | 9324cb3aa2aabfa4fb8f0dc5fc0f7639869db7b1 /linguistics/src/main/java/com/yahoo/language/process/Encoder.java | |
parent | b57543dc1a1e3d32bcd03afb7af972490d691bf1 (diff) | |
parent | e7e659e9d26401c8c36300d4760d4e34acd26d0a (diff) |
Merge pull request #19337 from vespa-engine/bratseth/encoder-to-embedderv7.474.25
encode -> embed
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process/Encoder.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/Encoder.java | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java b/linguistics/src/main/java/com/yahoo/language/process/Encoder.java deleted file mode 100644 index 27f73d15e54..00000000000 --- a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.process; - -import com.yahoo.language.Language; -import com.yahoo.tensor.Tensor; -import com.yahoo.tensor.TensorType; - -import java.util.List; - -/** - * An encoder converts a text string to a tensor or list of tokens - * - * @author bratseth - */ -public interface Encoder { - - /** An instance of this which throws IllegalStateException if attempted used */ - Encoder throwsOnUse = new FailingEncoder(); - - /** - * Encodes text into tokens in a list of ids. - * - * @param text the text to encode - * @param language the language of the text, or UNKNOWN to use language independent encoding - * @return the text encoded to a list of segment ids - * @throws IllegalArgumentException if the language is not supported by this encoder - */ - List<Integer> encode(String text, Language language); - - /** - * Encodes text into tokens in a tensor. - * The information contained in the encoding may depend on the tensor type. - * - * @param text the text to encode - * @param language the language of the text, or UNKNOWN to use language independent encoding - * @param tensorType the type of the ttensor to be returned - * @return the tex encoded into a tensor of the supplied type - * @throws IllegalArgumentException if the language or tensor type is not supported by this encoder - */ - Tensor encode(String text, Language language, TensorType tensorType); - - class FailingEncoder implements Encoder { - - @Override - public List<Integer> encode(String text, Language language) { - throw new IllegalStateException("No encoder has been configured"); - } - - @Override - public Tensor encode(String text, Language language, TensorType tensorType) { - throw new IllegalStateException("No encoder has been configured"); - } - - } - -} |