summaryrefslogtreecommitdiffstats
path: root/linguistics/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-09-30 13:46:50 +0200
committerJon Bratseth <bratseth@gmail.com>2021-09-30 13:46:50 +0200
commit9fdcf8f92eaf3b47053fa2c131832dea1c792d0c (patch)
treec4d8f2a7c8297fce1b4b6f07a32ab0daeac35aaa /linguistics/src
parent1bc2cca4b527bb9a5a8c67744b0796c9fafbe024 (diff)
Pass destination
This allows embedders to switch on it to enable bucket testing and similar.
Diffstat (limited to 'linguistics/src')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Embedder.java14
1 files changed, 10 insertions, 4 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
index 56c401a7c61..1f4473220d7 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
@@ -22,10 +22,13 @@ public interface Embedder {
*
* @param text the text to embed
* @param language the language of the text, or UNKNOWN to use language independent embedding
+ * @param destination the name of the recipient of this tensor, either a query feature name
+ * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
+ * This is useful for embedder components that alters behavior depending on the receiver.
* @return the text embedded as a list of token ids
* @throws IllegalArgumentException if the language is not supported by this embedder
*/
- List<Integer> embed(String text, Language language);
+ List<Integer> embed(String text, Language language, String destination);
/**
* Converts text into tokens in a tensor.
@@ -33,21 +36,24 @@ public interface Embedder {
*
* @param text the text to embed
* @param language the language of the text, or UNKNOWN to use language independent embedding
+ * @param destination the name of the recipient of this tensor, either a query feature name
+ * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
+ * This is useful for embedder components that alters behavior depending on the receiver.
* @param tensorType the type of the tensor to be returned
* @return the tensor embedding of the text, as the spoecified tensor type
* @throws IllegalArgumentException if the language or tensor type is not supported by this embedder
*/
- Tensor embed(String text, Language language, TensorType tensorType);
+ Tensor embed(String text, Language language, String destination, TensorType tensorType);
class FailingEmbedder implements Embedder {
@Override
- public List<Integer> embed(String text, Language language) {
+ public List<Integer> embed(String text, Language language, String destination) {
throw new IllegalStateException("No embedder has been configured");
}
@Override
- public Tensor embed(String text, Language language, TensorType tensorType) {
+ public Tensor embed(String text, Language language, String destination, TensorType tensorType) {
throw new IllegalStateException("No embedder has been configured");
}