summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process/Embedder.java')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Embedder.java58
1 files changed, 46 insertions, 12 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
index 1f4473220d7..17ee0419cea 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
@@ -21,39 +21,73 @@ public interface Embedder {
* Converts text into a list of token id's (a vector embedding)
*
* @param text the text to embed
- * @param language the language of the text, or UNKNOWN to use language independent embedding
- * @param destination the name of the recipient of this tensor, either a query feature name
- * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
- * This is useful for embedder components that alters behavior depending on the receiver.
+ * @param context the context which may influence an embedder's behavior
* @return the text embedded as a list of token ids
* @throws IllegalArgumentException if the language is not supported by this embedder
*/
- List<Integer> embed(String text, Language language, String destination);
+ List<Integer> embed(String text, Context context);
/**
* Converts text into tokens in a tensor.
* The information contained in the embedding may depend on the tensor type.
*
* @param text the text to embed
- * @param language the language of the text, or UNKNOWN to use language independent embedding
- * @param destination the name of the recipient of this tensor, either a query feature name
- * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
- * This is useful for embedder components that alters behavior depending on the receiver.
+ * @param context the context which may influence an embedder's behavior
* @param tensorType the type of the tensor to be returned
* @return the tensor embedding of the text, as the spoecified tensor type
* @throws IllegalArgumentException if the language or tensor type is not supported by this embedder
*/
- Tensor embed(String text, Language language, String destination, TensorType tensorType);
+ Tensor embed(String text, Context context, TensorType tensorType);
+
+ class Context {
+
+ private Language language = Language.UNKNOWN;
+ private String destination;
+
+ public Context(String destination) {
+ this.destination = destination;
+ }
+
+ /** Returns the language of the text, or UNKNOWN (default) to use a language independent embedding */
+ public Language getLanguage() { return language; }
+
+ /** Sets the language of the text, or UNKNOWN to use language independent embedding */
+ public Context setLanguage(Language language) {
+ this.language = language;
+ return this;
+ }
+
+ /**
+ * Returns the name of the recipient of this tensor.
+ *
+ * This is either a query feature name
+ * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
+ * This cannot be null.
+ */
+ public String getDestination() { return destination; }
+
+ /**
+ * Sets the name of the recipient of this tensor.
+ *
+ * This iseither a query feature name
+ * ("query(feature)"), or a schema and field name concatenated by a dot ("schema.field").
+ */
+ public Context setDestination(String destination) {
+ this.destination = destination;
+ return this;
+ }
+
+ }
class FailingEmbedder implements Embedder {
@Override
- public List<Integer> embed(String text, Language language, String destination) {
+ public List<Integer> embed(String text, Context context) {
throw new IllegalStateException("No embedder has been configured");
}
@Override
- public Tensor embed(String text, Language language, String destination, TensorType tensorType) {
+ public Tensor embed(String text, Context context, TensorType tensorType) {
throw new IllegalStateException("No embedder has been configured");
}