aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-09-28 21:51:45 +0200
committerGitHub <noreply@github.com>2021-09-28 21:51:45 +0200
commit12a415efca5749433fd22424592ddc18f04160f6 (patch)
tree9324cb3aa2aabfa4fb8f0dc5fc0f7639869db7b1
parentb57543dc1a1e3d32bcd03afb7af972490d691bf1 (diff)
parente7e659e9d26401c8c36300d4760d4e34acd26d0a (diff)
Merge pull request #19337 from vespa-engine/bratseth/encoder-to-embedderv7.474.25
encode -> embed
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java8
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java8
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java2
-rw-r--r--config-model/src/main/javacc/SDParser.jj8
-rw-r--r--container-core/src/main/java/com/yahoo/container/core/config/testutil/HandlersConfigurerTestWrapper.java4
-rw-r--r--container-core/src/main/java/com/yahoo/language/provider/DefaultEmbedderProvider.java (renamed from container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java)15
-rw-r--r--container-search/abi-spec.json12
-rw-r--r--container-search/src/main/java/com/yahoo/search/Query.java26
-rw-r--r--container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java20
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java12
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java12
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java15
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java10
-rw-r--r--container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEmbedderTestCase.java (renamed from container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEncoderTestCase.java)2
-rw-r--r--container-search/src/test/java/com/yahoo/search/query/profile/types/test/QueryProfileTypeTestCase.java27
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java6
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java10
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java4
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java10
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java2
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java13
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java (renamed from indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java)22
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java8
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java8
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java10
-rw-r--r--indexinglanguage/src/main/javacc/IndexingParser.jj20
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java4
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java18
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java4
-rw-r--r--indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java6
-rw-r--r--linguistics-components/abi-spec.json20
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEmbedder.java (renamed from linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java)29
-rw-r--r--linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def2
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java8
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java18
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java20
-rw-r--r--linguistics/abi-spec.json16
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Embedder.java56
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Encoder.java56
43 files changed, 267 insertions, 292 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
index 9c89517f72d..02df81fbbb3 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java
@@ -9,7 +9,7 @@ import com.yahoo.document.MapDataType;
import com.yahoo.document.StructDataType;
import com.yahoo.document.TensorDataType;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.searchdefinition.Index;
import com.yahoo.searchdefinition.Search;
@@ -426,12 +426,12 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer,
/** Parse an indexing expression which will use the simple linguistics implementatino suitable for testing */
public void parseIndexingScript(String script) {
- parseIndexingScript(script, new SimpleLinguistics(), Encoder.throwsOnUse);
+ parseIndexingScript(script, new SimpleLinguistics(), Embedder.throwsOnUse);
}
- public void parseIndexingScript(String script, Linguistics linguistics, Encoder encoder) {
+ public void parseIndexingScript(String script, Linguistics linguistics, Embedder embedder) {
try {
- ScriptParserContext config = new ScriptParserContext(linguistics, encoder);
+ ScriptParserContext config = new ScriptParserContext(linguistics, embedder);
config.setInputStream(new IndexingInput(script));
setIndexingScript(ScriptExpression.newInstance(config));
} catch (ParseException e) {
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java
index 87fa74b92fe..18e187fd921 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java
@@ -2,7 +2,7 @@
package com.yahoo.searchdefinition.fieldoperation;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.searchdefinition.document.SDField;
import com.yahoo.searchdefinition.parser.ParseException;
@@ -30,13 +30,13 @@ public class IndexingOperation implements FieldOperation {
/** Creates an indexing operation which will use the simple linguistics implementation suitable for testing */
public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine) throws ParseException {
- return fromStream(input, multiLine, new SimpleLinguistics(), Encoder.throwsOnUse);
+ return fromStream(input, multiLine, new SimpleLinguistics(), Embedder.throwsOnUse);
}
public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine,
- Linguistics linguistics, Encoder encoder)
+ Linguistics linguistics, Embedder embedder)
throws ParseException {
- ScriptParserContext config = new ScriptParserContext(linguistics, encoder);
+ ScriptParserContext config = new ScriptParserContext(linguistics, embedder);
config.setAnnotatorConfig(new AnnotatorConfig());
config.setInputStream(input);
ScriptExpression exp;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index 5574082e334..0bb04a1266d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -100,7 +100,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
.collect(Collectors.toUnmodifiableSet());
addSimpleComponent("com.yahoo.language.provider.DefaultLinguisticsProvider");
- addSimpleComponent("com.yahoo.language.provider.DefaultEncoderProvider");
+ addSimpleComponent("com.yahoo.language.provider.DefaultEmbedderProvider");
addSimpleComponent("com.yahoo.container.jdisc.SecretStoreProvider");
addSimpleComponent("com.yahoo.container.jdisc.DeprecatedSecretStoreProvider");
addSimpleComponent("com.yahoo.container.jdisc.CertificateStoreProvider");
diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj
index 7df77588fe8..6fd2f09f445 100644
--- a/config-model/src/main/javacc/SDParser.jj
+++ b/config-model/src/main/javacc/SDParser.jj
@@ -58,7 +58,7 @@ import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.application.api.FileRegistry;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.search.query.ranking.Diversity;
import java.util.Map;
@@ -112,7 +112,7 @@ public class SDParser {
*/
@SuppressWarnings("deprecation")
private IndexingOperation newIndexingOperation(boolean multiline) throws ParseException {
- return newIndexingOperation(multiline, new SimpleLinguistics(), Encoder.throwsOnUse);
+ return newIndexingOperation(multiline, new SimpleLinguistics(), Embedder.throwsOnUse);
}
/**
@@ -121,13 +121,13 @@ public class SDParser {
* @param multiline Whether or not to allow multi-line expressions.
* @param linguistics What to use for tokenizing.
*/
- private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Encoder encoder) throws ParseException {
+ private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Embedder embedder) throws ParseException {
SimpleCharStream input = (SimpleCharStream)token_source.input_stream;
if (token.next != null) {
input.backup(token.next.image.length());
}
try {
- return IndexingOperation.fromStream(input, multiline, linguistics, encoder);
+ return IndexingOperation.fromStream(input, multiline, linguistics, embedder);
} finally {
token.next = null;
jj_ntk = -1;
diff --git a/container-core/src/main/java/com/yahoo/container/core/config/testutil/HandlersConfigurerTestWrapper.java b/container-core/src/main/java/com/yahoo/container/core/config/testutil/HandlersConfigurerTestWrapper.java
index 0c4709e4a2c..e6231f11ae5 100644
--- a/container-core/src/main/java/com/yahoo/container/core/config/testutil/HandlersConfigurerTestWrapper.java
+++ b/container-core/src/main/java/com/yahoo/container/core/config/testutil/HandlersConfigurerTestWrapper.java
@@ -17,7 +17,7 @@ import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.handler.RequestHandler;
import com.yahoo.jdisc.test.MockMetric;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import java.io.File;
@@ -141,7 +141,7 @@ public class HandlersConfigurerTestWrapper {
protected void configure() {
// Needed by e.g. SearchHandler
bind(Linguistics.class).to(SimpleLinguistics.class).in(Scopes.SINGLETON);
- bind(Encoder.class).to(Encoder.FailingEncoder.class).in(Scopes.SINGLETON);
+ bind(Embedder.class).to(Embedder.FailingEmbedder.class).in(Scopes.SINGLETON);
bind(ContainerThreadPool.class).to(SimpleContainerThreadpool.class);
bind(Metric.class).to(MockMetric.class);
}
diff --git a/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java b/container-core/src/main/java/com/yahoo/language/provider/DefaultEmbedderProvider.java
index f8550d04d1c..ef371106b7d 100644
--- a/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java
+++ b/container-core/src/main/java/com/yahoo/language/provider/DefaultEmbedderProvider.java
@@ -3,27 +3,22 @@ package com.yahoo.language.provider;
import com.google.inject.Inject;
import com.yahoo.container.di.componentgraph.Provider;
-import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
-import com.yahoo.tensor.Tensor;
-import com.yahoo.tensor.TensorType;
-
-import java.util.List;
+import com.yahoo.language.process.Embedder;
/**
- * Provides the default encoder implementation if no encoder component has been explicitly configured
+ * Provides the default embedder implementation if no embedder component has been explicitly configured
* (dependency injection will fallback to providers if no components of the requested type is found).
*
* @author bratseth
*/
@SuppressWarnings("unused") // Injected
-public class DefaultEncoderProvider implements Provider<Encoder> {
+public class DefaultEmbedderProvider implements Provider<Embedder> {
@Inject
- public DefaultEncoderProvider() { }
+ public DefaultEmbedderProvider() { }
@Override
- public Encoder get() { return Encoder.throwsOnUse; }
+ public Embedder get() { return Embedder.throwsOnUse; }
@Override
public void deconstruct() {}
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 7016eff3185..40071f90c34 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -1801,8 +1801,8 @@
"public java.util.Map getRequestMap()",
"public com.yahoo.search.Query$Builder setQueryProfile(com.yahoo.search.query.profile.compiled.CompiledQueryProfile)",
"public com.yahoo.search.query.profile.compiled.CompiledQueryProfile getQueryProfile()",
- "public com.yahoo.search.Query$Builder setEncoder(com.yahoo.language.process.Encoder)",
- "public com.yahoo.language.process.Encoder getEncoder()",
+ "public com.yahoo.search.Query$Builder setEmbedder(com.yahoo.language.process.Embedder)",
+ "public com.yahoo.language.process.Embedder getEmbedder()",
"public com.yahoo.search.Query build()"
],
"fields": []
@@ -4258,7 +4258,7 @@
"public"
],
"methods": [
- "public void <init>(com.yahoo.statistics.Statistics, com.yahoo.jdisc.Metric, com.yahoo.container.handler.threadpool.ContainerThreadPool, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.container.core.ContainerHttpConfig, com.yahoo.language.process.Encoder, com.yahoo.search.searchchain.ExecutionFactory)",
+ "public void <init>(com.yahoo.statistics.Statistics, com.yahoo.jdisc.Metric, com.yahoo.container.handler.threadpool.ContainerThreadPool, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.container.core.ContainerHttpConfig, com.yahoo.language.process.Embedder, com.yahoo.search.searchchain.ExecutionFactory)",
"public void <init>(com.yahoo.statistics.Statistics, com.yahoo.jdisc.Metric, com.yahoo.container.handler.threadpool.ContainerThreadPool, com.yahoo.container.logging.AccessLog, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.container.core.ContainerHttpConfig, com.yahoo.search.searchchain.ExecutionFactory)",
"public void <init>(com.yahoo.statistics.Statistics, com.yahoo.jdisc.Metric, java.util.concurrent.Executor, com.yahoo.container.logging.AccessLog, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.container.core.ContainerHttpConfig, com.yahoo.search.searchchain.ExecutionFactory)",
"public void <init>(com.yahoo.statistics.Statistics, com.yahoo.jdisc.Metric, java.util.concurrent.Executor, com.yahoo.container.logging.AccessLog, com.yahoo.search.query.profile.config.QueryProfilesConfig, com.yahoo.container.core.ContainerHttpConfig, com.yahoo.search.searchchain.ExecutionFactory)",
@@ -5885,7 +5885,7 @@
],
"methods": [
"public void <init>(com.yahoo.search.query.profile.compiled.CompiledQueryProfile)",
- "public void <init>(com.yahoo.search.query.profile.compiled.CompiledQueryProfile, com.yahoo.language.process.Encoder)",
+ "public void <init>(com.yahoo.search.query.profile.compiled.CompiledQueryProfile, com.yahoo.language.process.Embedder)",
"public com.yahoo.search.query.profile.compiled.CompiledQueryProfile getQueryProfile()",
"public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)",
"public void set(com.yahoo.processing.request.CompoundName, java.lang.Object, java.util.Map)",
@@ -6259,7 +6259,7 @@
"public"
],
"methods": [
- "public void <init>(com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.language.process.Encoder, java.util.Map)",
+ "public void <init>(com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.language.process.Embedder, java.util.Map)",
"public static com.yahoo.search.query.profile.types.ConversionContext empty()"
],
"fields": []
@@ -6531,7 +6531,7 @@
"public"
],
"methods": [
- "public void <init>(com.yahoo.search.Query, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.language.process.Encoder)",
+ "public void <init>(com.yahoo.search.Query, com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry, com.yahoo.language.process.Embedder)",
"public void setParentQuery(com.yahoo.search.Query)",
"public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)",
"public void set(com.yahoo.processing.request.CompoundName, java.lang.Object, java.util.Map)",
diff --git a/container-search/src/main/java/com/yahoo/search/Query.java b/container-search/src/main/java/com/yahoo/search/Query.java
index 06b71599103..08ebd74da5a 100644
--- a/container-search/src/main/java/com/yahoo/search/Query.java
+++ b/container-search/src/main/java/com/yahoo/search/Query.java
@@ -7,7 +7,7 @@ import com.yahoo.collections.Tuple2;
import com.yahoo.component.Version;
import com.yahoo.container.jdisc.HttpRequest;
import com.yahoo.fs4.MapEncoder;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.prelude.fastsearch.DocumentDatabase;
import com.yahoo.prelude.query.Highlight;
import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation;
@@ -334,32 +334,32 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
public Query(HttpRequest request, Map<String, String> requestMap, CompiledQueryProfile queryProfile) {
super(new QueryPropertyAliases(propertyAliases));
this.httpRequest = request;
- init(requestMap, queryProfile, Encoder.throwsOnUse);
+ init(requestMap, queryProfile, Embedder.throwsOnUse);
}
// TODO: Deprecate most constructors above here
private Query(Builder builder) {
- this(builder.getRequest(), builder.getRequestMap(), builder.getQueryProfile(), builder.getEncoder());
+ this(builder.getRequest(), builder.getRequestMap(), builder.getQueryProfile(), builder.getEmbedder());
}
- private Query(HttpRequest request, Map<String, String> requestMap, CompiledQueryProfile queryProfile, Encoder encoder) {
+ private Query(HttpRequest request, Map<String, String> requestMap, CompiledQueryProfile queryProfile, Embedder embedder) {
super(new QueryPropertyAliases(propertyAliases));
this.httpRequest = request;
- init(requestMap, queryProfile, encoder);
+ init(requestMap, queryProfile, embedder);
}
- private void init(Map<String, String> requestMap, CompiledQueryProfile queryProfile, Encoder encoder) {
+ private void init(Map<String, String> requestMap, CompiledQueryProfile queryProfile, Embedder embedder) {
startTime = httpRequest.getJDiscRequest().creationTime(TimeUnit.MILLISECONDS);
if (queryProfile != null) {
// Move all request parameters to the query profile just to validate that the parameter settings are legal
- Properties queryProfileProperties = new QueryProfileProperties(queryProfile, encoder);
+ Properties queryProfileProperties = new QueryProfileProperties(queryProfile, embedder);
properties().chain(queryProfileProperties);
// TODO: Just checking legality rather than actually setting would be faster
setPropertiesFromRequestMap(requestMap, properties(), true); // Adds errors to the query for illegal set attempts
// Create the full chain
- properties().chain(new QueryProperties(this, queryProfile.getRegistry(), encoder)).
+ properties().chain(new QueryProperties(this, queryProfile.getRegistry(), embedder)).
chain(new ModelObjectMap()).
chain(new RequestContextProperties(requestMap)).
chain(queryProfileProperties).
@@ -378,7 +378,7 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
}
else { // bypass these complications if there is no query profile to get values from and validate against
properties().
- chain(new QueryProperties(this, CompiledQueryProfileRegistry.empty, encoder)).
+ chain(new QueryProperties(this, CompiledQueryProfileRegistry.empty, embedder)).
chain(new PropertyMap()).
chain(new DefaultProperties());
setPropertiesFromRequestMap(requestMap, properties(), false);
@@ -1130,7 +1130,7 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
private HttpRequest request = null;
private Map<String, String> requestMap = null;
private CompiledQueryProfile queryProfile = null;
- private Encoder encoder = Encoder.throwsOnUse;
+ private Embedder embedder = Embedder.throwsOnUse;
public Builder setRequest(String query) {
request = HttpRequest.createTestRequest(query, com.yahoo.jdisc.http.HttpRequest.Method.GET);
@@ -1168,12 +1168,12 @@ public class Query extends com.yahoo.processing.Request implements Cloneable {
/** Returns the query profile of this query, or null if none. */
public CompiledQueryProfile getQueryProfile() { return queryProfile; }
- public Builder setEncoder(Encoder encoder) {
- this.encoder = encoder;
+ public Builder setEmbedder(Embedder embedder) {
+ this.embedder = embedder;
return this;
}
- public Encoder getEncoder() { return encoder; }
+ public Embedder getEmbedder() { return embedder; }
/** Creates a new query from this builder. No properties are required to before calling this. */
public Query build() { return new Query(this); }
diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java
index d1e57a30206..c15aef44f3d 100644
--- a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java
+++ b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java
@@ -23,7 +23,7 @@ import com.yahoo.io.IOUtils;
import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.Request;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.net.HostName;
import com.yahoo.net.UriTools;
import com.yahoo.prelude.query.parser.ParseException;
@@ -106,7 +106,7 @@ public class SearchHandler extends LoggingRequestHandler {
private final String selfHostname = HostName.getLocalhost();
- private final Encoder encoder;
+ private final Embedder embedder;
private final ExecutionFactory executionFactory;
@@ -134,9 +134,9 @@ public class SearchHandler extends LoggingRequestHandler {
ContainerThreadPool threadpool,
CompiledQueryProfileRegistry queryProfileRegistry,
ContainerHttpConfig config,
- Encoder encoder,
+ Embedder embedder,
ExecutionFactory executionFactory) {
- this(statistics, metric, threadpool.executor(), queryProfileRegistry, encoder, executionFactory,
+ this(statistics, metric, threadpool.executor(), queryProfileRegistry, embedder, executionFactory,
config.numQueriesToTraceOnDebugAfterConstruction(),
config.hostResponseHeaderKey().equals("") ? Optional.empty() : Optional.of(config.hostResponseHeaderKey()));
}
@@ -170,7 +170,7 @@ public class SearchHandler extends LoggingRequestHandler {
metric,
executor,
queryProfileRegistry,
- Encoder.throwsOnUse,
+ Embedder.throwsOnUse,
executionFactory,
containerHttpConfig.numQueriesToTraceOnDebugAfterConstruction(),
containerHttpConfig.hostResponseHeaderKey().equals("") ?
@@ -192,7 +192,7 @@ public class SearchHandler extends LoggingRequestHandler {
metric,
executor,
QueryProfileConfigurer.createFromConfig(queryProfileConfig).compile(),
- Encoder.throwsOnUse,
+ Embedder.throwsOnUse,
executionFactory,
containerHttpConfig.numQueriesToTraceOnDebugAfterConstruction(),
containerHttpConfig.hostResponseHeaderKey().equals("") ?
@@ -210,7 +210,7 @@ public class SearchHandler extends LoggingRequestHandler {
CompiledQueryProfileRegistry queryProfileRegistry,
ExecutionFactory executionFactory,
Optional<String> hostResponseHeaderKey) {
- this(statistics, metric, executor, queryProfileRegistry, Encoder.throwsOnUse,
+ this(statistics, metric, executor, queryProfileRegistry, Embedder.throwsOnUse,
executionFactory, 0, hostResponseHeaderKey);
}
@@ -218,14 +218,14 @@ public class SearchHandler extends LoggingRequestHandler {
Metric metric,
Executor executor,
CompiledQueryProfileRegistry queryProfileRegistry,
- Encoder encoder,
+ Embedder embedder,
ExecutionFactory executionFactory,
long numQueriesToTraceOnDebugAfterStartup,
Optional<String> hostResponseHeaderKey) {
super(executor, metric, true);
log.log(Level.FINE, () -> "SearchHandler.init " + System.identityHashCode(this));
this.queryProfileRegistry = queryProfileRegistry;
- this.encoder = encoder;
+ this.embedder = embedder;
this.executionFactory = executionFactory;
this.maxThreads = examineExecutor(executor);
@@ -332,7 +332,7 @@ public class SearchHandler extends LoggingRequestHandler {
Query query = new Query.Builder().setRequest(request)
.setRequestMap(requestMap)
.setQueryProfile(queryProfile)
- .setEncoder(encoder)
+ .setEmbedder(embedder)
.build();
boolean benchmarking = VespaHeaders.benchmarkOutput(request);
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java
index e555000272d..53be827073c 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/QueryProfileProperties.java
@@ -2,7 +2,7 @@
package com.yahoo.search.query.profile;
import com.yahoo.collections.Pair;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.processing.request.CompoundName;
import com.yahoo.processing.request.properties.PropertyMap;
@@ -30,7 +30,7 @@ import java.util.Map;
public class QueryProfileProperties extends Properties {
private final CompiledQueryProfile profile;
- private final Encoder encoder;
+ private final Embedder embedder;
// Note: The priority order is: values has precedence over references
@@ -45,14 +45,14 @@ public class QueryProfileProperties extends Properties {
private List<Pair<CompoundName, CompiledQueryProfile>> references = null;
public QueryProfileProperties(CompiledQueryProfile profile) {
- this(profile, Encoder.throwsOnUse);
+ this(profile, Embedder.throwsOnUse);
}
/** Creates an instance from a profile, throws an exception if the given profile is null */
- public QueryProfileProperties(CompiledQueryProfile profile, Encoder encoder) {
+ public QueryProfileProperties(CompiledQueryProfile profile, Embedder embedder) {
Validator.ensureNotNull("The profile wrapped by this cannot be null", profile);
this.profile = profile;
- this.encoder = encoder;
+ this.embedder = embedder;
}
/** Returns the query profile backing this, or null if none */
@@ -122,7 +122,7 @@ public class QueryProfileProperties extends Properties {
if (fieldDescription != null) {
if (i == name.size() - 1) { // at the end of the path, check the assignment type
value = fieldDescription.getType().convertFrom(value, new ConversionContext(profile.getRegistry(),
- encoder,
+ embedder,
context));
if (value == null)
throw new IllegalInputException("'" + value + "' is not a " +
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java
index 4aa95741b06..e5b9eb1c1cd 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/ConversionContext.java
@@ -2,7 +2,7 @@
package com.yahoo.search.query.profile.types;
import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
import java.util.Map;
@@ -13,12 +13,12 @@ import java.util.Map;
public class ConversionContext {
private final CompiledQueryProfileRegistry registry;
- private final Encoder encoder;
+ private final Embedder embedder;
private final Language language;
- public ConversionContext(CompiledQueryProfileRegistry registry, Encoder encoder, Map<String, String> context) {
+ public ConversionContext(CompiledQueryProfileRegistry registry, Embedder embedder, Map<String, String> context) {
this.registry = registry;
- this.encoder = encoder;
+ this.embedder = embedder;
this.language = context.containsKey("language") ? Language.fromLanguageTag(context.get("language"))
: Language.UNKNOWN;
}
@@ -27,14 +27,14 @@ public class ConversionContext {
CompiledQueryProfileRegistry getRegistry() {return registry;}
/** Returns the configured encoder, never null */
- Encoder getEncoder() { return encoder; }
+ Embedder getEncoder() { return embedder; }
/** Returns the language, which is never null but may be UNKNOWN */
Language getLanguage() { return language; }
/** Returns an empty context */
public static ConversionContext empty() {
- return new ConversionContext(null, Encoder.throwsOnUse, Map.of());
+ return new ConversionContext(null, Embedder.throwsOnUse, Map.of());
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java
index 511b64c7b6e..7a06f9ef534 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/FieldType.java
@@ -1,10 +1,8 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.profile.types;
-import com.yahoo.language.process.Encoder;
import com.yahoo.search.query.profile.QueryProfile;
import com.yahoo.search.query.profile.QueryProfileRegistry;
-import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
import com.yahoo.search.yql.YqlQuery;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java
index b1a9820c6fa..f9d8950908b 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/PrimitiveFieldType.java
@@ -1,9 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.profile.types;
-import com.yahoo.language.process.Encoder;
import com.yahoo.search.query.profile.QueryProfileRegistry;
-import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
import static com.yahoo.text.Lowercase.toLowerCase;
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java
index 09c1a4d0cc0..cbae6402039 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryFieldType.java
@@ -1,9 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.profile.types;
-import com.yahoo.language.process.Encoder;
import com.yahoo.search.query.profile.QueryProfileRegistry;
-import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
import com.yahoo.search.yql.YqlQuery;
/**
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java
index 6958318bee4..ff12224823f 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/QueryProfileFieldType.java
@@ -1,11 +1,9 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.profile.types;
-import com.yahoo.language.process.Encoder;
import com.yahoo.search.query.profile.QueryProfile;
import com.yahoo.search.query.profile.QueryProfileRegistry;
import com.yahoo.search.query.profile.compiled.CompiledQueryProfile;
-import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
/**
* Represents a query profile field type which is a reference to a query profile.
diff --git a/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java
index 34a9f8d41c3..cd21f0b3a61 100644
--- a/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java
+++ b/container-search/src/main/java/com/yahoo/search/query/profile/types/TensorFieldType.java
@@ -2,9 +2,8 @@
package com.yahoo.search.query.profile.types;
import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.search.query.profile.QueryProfileRegistry;
-import com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
@@ -48,18 +47,18 @@ public class TensorFieldType extends FieldType {
return convertFrom(o, context.getEncoder(), context.getLanguage());
}
- private Object convertFrom(Object o, Encoder encoder, Language language) {
+ private Object convertFrom(Object o, Embedder embedder, Language language) {
if (o instanceof Tensor) return o;
- if (o instanceof String && ((String)o).startsWith("encode(")) return encode((String)o, encoder, language);
+ if (o instanceof String && ((String)o).startsWith("embed(")) return encode((String)o, embedder, language);
if (o instanceof String) return Tensor.from(type, (String)o);
return null;
}
- private Tensor encode(String s, Encoder encoder, Language language) {
+ private Tensor encode(String s, Embedder embedder, Language language) {
if ( ! s.endsWith(")"))
- throw new IllegalArgumentException("Expected any string enclosed in encode(), but the argument does not end by ')'");
- String text = s.substring("encode(".length(), s.length() - 1);
- return encoder.encode(text, language, type);
+ throw new IllegalArgumentException("Expected any string enclosed in embed(), but the argument does not end by ')'");
+ String text = s.substring("embed(".length(), s.length() - 1);
+ return embedder.embed(text, language, type);
}
public static TensorFieldType fromTypeString(String s) {
diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java
index 02648f84066..3a426656185 100644
--- a/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java
+++ b/container-search/src/main/java/com/yahoo/search/query/properties/QueryProperties.java
@@ -1,7 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.properties;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.Query;
@@ -34,12 +34,12 @@ public class QueryProperties extends Properties {
private Query query;
private final CompiledQueryProfileRegistry profileRegistry;
- private final Encoder encoder;
+ private final Embedder embedder;
- public QueryProperties(Query query, CompiledQueryProfileRegistry profileRegistry, Encoder encoder) {
+ public QueryProperties(Query query, CompiledQueryProfileRegistry profileRegistry, Embedder embedder) {
this.query = query;
this.profileRegistry = profileRegistry;
- this.encoder = encoder;
+ this.embedder = embedder;
}
public void setParentQuery(Query query) {
@@ -380,7 +380,7 @@ public class QueryProperties extends Properties {
if (type == null) return value; // no type info -> keep as string
FieldDescription field = type.getField(key);
if (field == null) return value; // ditto
- return field.getType().convertFrom(value, new ConversionContext(profileRegistry, encoder, context));
+ return field.getType().convertFrom(value, new ConversionContext(profileRegistry, embedder, context));
}
private void throwIllegalParameter(String key,String namespace) {
diff --git a/container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEncoderTestCase.java b/container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEmbedderTestCase.java
index 3b48ae35fcf..18a9f11e15e 100644
--- a/container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEncoderTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/grouping/vespa/IntegerEmbedderTestCase.java
@@ -8,7 +8,7 @@ import static org.junit.Assert.assertEquals;
/**
* @author Simon Thoresen Hult
*/
-public class IntegerEncoderTestCase {
+public class IntegerEmbedderTestCase {
@Test
public void requireThatIntEncoderWorksAsExpected() {
diff --git a/container-search/src/test/java/com/yahoo/search/query/profile/types/test/QueryProfileTypeTestCase.java b/container-search/src/test/java/com/yahoo/search/query/profile/types/test/QueryProfileTypeTestCase.java
index 45f53a1cdb9..e22263070e0 100644
--- a/container-search/src/test/java/com/yahoo/search/query/profile/types/test/QueryProfileTypeTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/query/profile/types/test/QueryProfileTypeTestCase.java
@@ -4,7 +4,7 @@ package com.yahoo.search.query.profile.types.test;
import com.yahoo.component.ComponentId;
import com.yahoo.container.jdisc.HttpRequest;
import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
import com.yahoo.yolean.Exceptions;
@@ -22,7 +22,6 @@ import com.yahoo.search.query.profile.types.QueryProfileTypeRegistry;
import org.junit.Before;
import org.junit.Test;
-import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
@@ -441,19 +440,19 @@ public class QueryProfileTypeTestCase {
}
@Test
- public void testUnencodedTensorRankFeatureInRequest() {
+ public void testUnembeddedTensorRankFeatureInRequest() {
QueryProfile profile = new QueryProfile("test");
profile.setType(testtype);
registry.register(profile);
CompiledQueryProfileRegistry cRegistry = registry.compile();
- String textToEncode = "text to encode as tensor";
+ String textToEmbed = "text to embed into a tensor";
Tensor expectedTensor = Tensor.from("tensor<float>(x[5]):[3,7,4,0,0]]");
Query query1 = new Query.Builder().setRequest(HttpRequest.createTestRequest("?" + urlEncode("ranking.features.query(myTensor4)") +
- "=" + urlEncode("encode(" + textToEncode + ")"),
+ "=" + urlEncode("embed(" + textToEmbed + ")"),
com.yahoo.jdisc.http.HttpRequest.Method.GET))
.setQueryProfile(cRegistry.getComponent("test"))
- .setEncoder(new MockEncoder(textToEncode, Language.UNKNOWN, expectedTensor))
+ .setEmbedder(new MockEmbedder(textToEmbed, Language.UNKNOWN, expectedTensor))
.build();
assertEquals(0, query1.errors().size());
assertEquals(expectedTensor, query1.properties().get("ranking.features.query(myTensor4)"));
@@ -461,11 +460,11 @@ public class QueryProfileTypeTestCase {
// Explicit language
Query query2 = new Query.Builder().setRequest(HttpRequest.createTestRequest("?" + urlEncode("ranking.features.query(myTensor4)") +
- "=" + urlEncode("encode(" + textToEncode + ")") +
+ "=" + urlEncode("embed(" + textToEmbed + ")") +
"&language=en",
com.yahoo.jdisc.http.HttpRequest.Method.GET))
.setQueryProfile(cRegistry.getComponent("test"))
- .setEncoder(new MockEncoder(textToEncode, Language.ENGLISH, expectedTensor))
+ .setEmbedder(new MockEmbedder(textToEmbed, Language.ENGLISH, expectedTensor))
.build();
assertEquals(0, query2.errors().size());
assertEquals(expectedTensor, query2.properties().get("ranking.features.query(myTensor4)"));
@@ -723,28 +722,28 @@ public class QueryProfileTypeTestCase {
}
}
- private static final class MockEncoder implements Encoder {
+ private static final class MockEmbedder implements Embedder {
private final String expectedText;
private final Language expectedLanguage;
private final Tensor tensorToReturn;
- public MockEncoder(String expectedText,
- Language expectedLanguage,
- Tensor tensorToReturn) {
+ public MockEmbedder(String expectedText,
+ Language expectedLanguage,
+ Tensor tensorToReturn) {
this.expectedText = expectedText;
this.expectedLanguage = expectedLanguage;
this.tensorToReturn = tensorToReturn;
}
@Override
- public List<Integer> encode(String text, Language language) {
+ public List<Integer> embed(String text, Language language) {
fail("Unexpected call");
return null;
}
@Override
- public Tensor encode(String text, Language language, TensorType tensorType) {
+ public Tensor embed(String text, Language language, TensorType tensorType) {
assertEquals(expectedText, text);
assertEquals(expectedLanguage, language);
assertEquals(tensorToReturn.type(), tensorType);
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
index 53709c4ff87..f3a67f855e9 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
@@ -21,7 +21,7 @@ import com.yahoo.document.config.DocumentmanagerConfig;
import com.yahoo.language.Linguistics;
import java.util.logging.Level;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.AdapterFactory;
import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory;
@@ -55,9 +55,9 @@ public class IndexingProcessor extends DocumentProcessor {
public IndexingProcessor(DocumentmanagerConfig documentmanagerConfig,
IlscriptsConfig ilscriptsConfig,
Linguistics linguistics,
- Encoder encoder) {
+ Embedder embedder) {
docTypeMgr = DocumentTypeManagerConfigurer.configureNewManager(documentmanagerConfig);
- scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics, encoder);
+ scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics, embedder);
adapterFactory = new SimpleAdapterFactory(new ExpressionSelector());
}
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
index fa5f794f652..7e1d5b5b6ce 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
@@ -6,7 +6,7 @@ import com.yahoo.document.DocumentTypeManager;
import com.yahoo.language.Linguistics;
import java.util.logging.Level;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
@@ -28,9 +28,9 @@ public class ScriptManager {
private final Map<String, Map<String, DocumentScript>> documentFieldScripts;
private final DocumentTypeManager docTypeMgr;
- public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics, Encoder encoder) {
+ public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics, Embedder embedder) {
this.docTypeMgr = docTypeMgr;
- documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics, encoder);
+ documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics, embedder);
}
@@ -75,9 +75,9 @@ public class ScriptManager {
private static Map<String, Map<String, DocumentScript>> createScriptsMap(DocumentTypeManager docTypeMgr,
IlscriptsConfig config,
Linguistics linguistics,
- Encoder encoder) {
+ Embedder embedder) {
Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size());
- ScriptParserContext parserContext = new ScriptParserContext(linguistics, encoder);
+ ScriptParserContext parserContext = new ScriptParserContext(linguistics, embedder);
parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences());
parserContext.getAnnotatorConfig().setMaxTokenLength(config.fieldmatchmaxlength());
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
index dc9b1ffba73..f54435329f9 100644
--- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/IndexingProcessorTestCase.java
@@ -13,7 +13,7 @@ import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.update.AssignValueUpdate;
import com.yahoo.document.update.FieldUpdate;
import com.yahoo.document.update.ValueUpdate;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import org.junit.Test;
@@ -127,6 +127,6 @@ public class IndexingProcessorTestCase {
return new IndexingProcessor(ConfigGetter.getConfig(DocumentmanagerConfig.class, configId),
ConfigGetter.getConfig(IlscriptsConfig.class, configId),
new SimpleLinguistics(),
- Encoder.throwsOnUse);
+ Embedder.throwsOnUse);
}
}
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
index ec05fcbe422..a849f437b44 100644
--- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
@@ -3,7 +3,7 @@ package com.yahoo.docprocs.indexing;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;
import org.junit.Test;
@@ -29,7 +29,7 @@ public class ScriptManagerTestCase {
IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newssummary")
.content("input title | index title"));
- ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse);
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Embedder.throwsOnUse);
assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newsarticle")));
assertNull(scriptMgr.getScript(new DocumentType("unknown")));
}
@@ -44,7 +44,7 @@ public class ScriptManagerTestCase {
IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newsarticle")
.content("input title | index title"));
- ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse);
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Embedder.throwsOnUse);
assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newssummary")));
assertNull(scriptMgr.getScript(new DocumentType("unknown")));
}
@@ -53,7 +53,7 @@ public class ScriptManagerTestCase {
public void requireThatEmptyConfigurationDoesNotThrow() {
DocumentTypeManager typeMgr = new DocumentTypeManager();
typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
- ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Encoder.throwsOnUse);
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Embedder.throwsOnUse);
assertNull(scriptMgr.getScript(new DocumentType("unknown")));
}
@@ -61,7 +61,7 @@ public class ScriptManagerTestCase {
public void requireThatUnknownDocumentTypeReturnsNull() {
DocumentTypeManager typeMgr = new DocumentTypeManager();
typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg");
- ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Encoder.throwsOnUse);
+ ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Embedder.throwsOnUse);
for (Iterator<DocumentType> it = typeMgr.documentTypeIterator(); it.hasNext(); ) {
assertNull(scriptMgr.getScript(it.next()));
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java
index 34da5b47655..649095d1db8 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java
@@ -62,7 +62,7 @@ public final class ScriptParser {
parser.setAnnotatorConfig(context.getAnnotatorConfig());
parser.setDefaultFieldName(context.getDefaultFieldName());
parser.setLinguistics(context.getLinguistcs());
- parser.setEncoder(context.getEncoder());
+ parser.setEmbedder(context.getEmbedder());
try {
return method.call(parser);
} catch (ParseException e) {
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java
index 06be91703fa..77c2af8dd42 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java
@@ -2,8 +2,7 @@
package com.yahoo.vespa.indexinglanguage;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
-import com.yahoo.language.simple.SimpleLinguistics;
+import com.yahoo.language.process.Embedder;
import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
import com.yahoo.vespa.indexinglanguage.parser.CharStream;
@@ -14,13 +13,13 @@ public class ScriptParserContext {
private AnnotatorConfig annotatorConfig = new AnnotatorConfig();
private Linguistics linguistics;
- private final Encoder encoder;
+ private final Embedder embedder;
private String defaultFieldName = null;
private CharStream inputStream = null;
- public ScriptParserContext(Linguistics linguistics, Encoder encoder) {
+ public ScriptParserContext(Linguistics linguistics, Embedder embedder) {
this.linguistics = linguistics;
- this.encoder = encoder;
+ this.embedder = embedder;
}
public AnnotatorConfig getAnnotatorConfig() {
@@ -41,8 +40,8 @@ public class ScriptParserContext {
return this;
}
- public Encoder getEncoder() {
- return encoder;
+ public Embedder getEmbedder() {
+ return embedder;
}
public String getDefaultFieldName() {
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java
index f84da9ddef8..aa579ed729e 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java
@@ -5,25 +5,25 @@ import com.yahoo.document.DataType;
import com.yahoo.document.TensorDataType;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.datatypes.TensorFieldValue;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
/**
- * Encodes a string as a tensor using the configured Encoder component
+ * Embeds a string in a tensor space using the configured Embedder component
*
* @author bratseth
*/
-public class EncodeExpression extends Expression {
+public class EmbedExpression extends Expression {
- private final Encoder encoder;
+ private final Embedder embedder;
- /** The target type we are encoding to. Set during verification. */
+ /** The target type we are embedding into. */
private TensorType targetType;
- public EncodeExpression(Encoder encoder) {
+ public EmbedExpression(Embedder embedder) {
super(DataType.STRING);
- this.encoder = encoder;
+ this.embedder = embedder;
}
@Override
@@ -34,7 +34,7 @@ public class EncodeExpression extends Expression {
@Override
protected void doExecute(ExecutionContext context) {
StringFieldValue input = (StringFieldValue) context.getValue();
- Tensor tensor = encoder.encode(input.getString(), context.getLanguage(), targetType);
+ Tensor tensor = embedder.embed(input.getString(), context.getLanguage(), targetType);
context.setValue(new TensorFieldValue(tensor));
}
@@ -43,7 +43,7 @@ public class EncodeExpression extends Expression {
String outputField = context.getOutputField();
if (outputField == null)
throw new VerificationException(this, "No output field in this statement: " +
- "Don't know what tensor type to encode to.");
+ "Don't know what tensor type to embed into.");
DataType outputFieldType = context.getInputType(this, outputField);
if ( ! (outputFieldType instanceof TensorDataType) )
throw new VerificationException(this, "The type of the output field " + outputField +
@@ -58,12 +58,12 @@ public class EncodeExpression extends Expression {
}
@Override
- public String toString() { return "encode"; }
+ public String toString() { return "embed"; }
@Override
public int hashCode() { return 1; }
@Override
- public boolean equals(Object o) { return o instanceof EncodeExpression; }
+ public boolean equals(Object o) { return o instanceof EmbedExpression; }
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
index 67459c2b035..20a0c9804a9 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
@@ -6,7 +6,7 @@ import com.yahoo.document.Document;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.*;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
@@ -188,11 +188,11 @@ public abstract class Expression extends Selectable {
/** Creates an expression with simple lingustics for testing */
public static Expression fromString(String expression) throws ParseException {
- return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse);
+ return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse);
}
- public static Expression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException {
- return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression)));
+ public static Expression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException {
+ return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression)));
}
public static Expression newInstance(ScriptParserContext context) throws ParseException {
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java
index 7317cb2216f..b5f71813de3 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java
@@ -4,7 +4,7 @@ package com.yahoo.vespa.indexinglanguage.expressions;
import com.yahoo.document.DataType;
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.ScriptParser;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
@@ -92,11 +92,11 @@ public final class ScriptExpression extends ExpressionList<StatementExpression>
/** Creates an expression with simple lingustics for testing */
@SuppressWarnings("deprecation")
public static ScriptExpression fromString(String expression) throws ParseException {
- return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse);
+ return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse);
}
- public static ScriptExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException {
- return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression)));
+ public static ScriptExpression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException {
+ return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression)));
}
public static ScriptExpression newInstance(ScriptParserContext config) throws ParseException {
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java
index 145133e210d..7d157af1a19 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java
@@ -2,9 +2,8 @@
package com.yahoo.vespa.indexinglanguage.expressions;
import com.yahoo.document.DataType;
-import com.yahoo.document.TensorDataType;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.ScriptParser;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
@@ -12,7 +11,6 @@ import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -100,11 +98,11 @@ public final class StatementExpression extends ExpressionList<Expression> {
/** Creates an expression with simple lingustics for testing */
public static StatementExpression fromString(String expression) throws ParseException {
- return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse);
+ return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse);
}
- public static StatementExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException {
- return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression)));
+ public static StatementExpression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException {
+ return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression)));
}
public static StatementExpression newInstance(ScriptParserContext config) throws ParseException {
diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj
index 4533a17954c..3eee4ea6f08 100644
--- a/indexinglanguage/src/main/javacc/IndexingParser.jj
+++ b/indexinglanguage/src/main/javacc/IndexingParser.jj
@@ -34,7 +34,7 @@ import com.yahoo.text.StringUtilities;
import com.yahoo.vespa.indexinglanguage.expressions.*;
import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig;
import com.yahoo.language.process.StemMode;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.Linguistics;
/**
@@ -45,7 +45,7 @@ public class IndexingParser {
private String defaultFieldName;
private Linguistics linguistics;
- private Encoder encoder;
+ private Embedder embedder;
private AnnotatorConfig annotatorCfg;
public IndexingParser(String str) {
@@ -62,8 +62,8 @@ public class IndexingParser {
return this;
}
- public IndexingParser setEncoder(Encoder encoder) {
- this.encoder = encoder;
+ public IndexingParser setEmbedder(Embedder embedder) {
+ this.embedder = embedder;
return this;
}
@@ -157,7 +157,7 @@ TOKEN :
<CREATE_IF_NON_EXISTENT: "create_if_non_existent"> |
<ECHO: "echo"> |
<ELSE: "else"> |
- <ENCODE: "encode"> |
+ <EMBED: "embed"> |
<EXACT: "exact"> |
<FLATTEN: "flatten"> |
<FOR_EACH: "for_each"> |
@@ -283,7 +283,7 @@ Expression value() :
val = base64EncodeExp() |
val = clearStateExp() |
val = echoExp() |
- val = encodeExp() |
+ val = embedExp() |
val = exactExp() |
val = flattenExp() |
val = forEachExp() |
@@ -365,10 +365,10 @@ Expression echoExp() : { }
{ return new EchoExpression(); }
}
-Expression encodeExp() : { }
+Expression embedExp() : { }
{
- ( <ENCODE> )
- { return new EncodeExpression(encoder); }
+ ( <EMBED> )
+ { return new EmbedExpression(embedder); }
}
Expression exactExp() : { }
@@ -744,7 +744,7 @@ String identifier() :
<ECHO> |
<EXACT> |
<ELSE> |
- <ENCODE> |
+ <EMBED> |
<FLATTEN> |
<FOR_EACH> |
<GET_FIELD> |
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java
index 32e38dbee6f..06d185339a6 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java
@@ -1,7 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.expressions.EchoExpression;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
@@ -96,7 +96,7 @@ public class ScriptParserTestCase {
}
private static ScriptParserContext newContext(String input) {
- return new ScriptParserContext(new SimpleLinguistics(), Encoder.throwsOnUse).setInputStream(new IndexingInput(input));
+ return new ScriptParserContext(new SimpleLinguistics(), Embedder.throwsOnUse).setInputStream(new IndexingInput(input));
}
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
index 9d3d0abb256..188426b1a06 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java
@@ -5,26 +5,20 @@ import com.yahoo.document.DataType;
import com.yahoo.document.Document;
import com.yahoo.document.DocumentType;
import com.yahoo.document.Field;
-import com.yahoo.document.FieldPath;
import com.yahoo.document.TensorDataType;
import com.yahoo.document.datatypes.BoolFieldValue;
-import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.tensor.Tensor;
-import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import com.yahoo.vespa.indexinglanguage.expressions.*;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;
import org.junit.Test;
-import java.util.Iterator;
import java.util.List;
-import java.util.Map;
-import java.util.Set;
import static org.junit.Assert.*;
@@ -106,9 +100,9 @@ public class ScriptTestCase {
}
@Test
- public void testEncode() throws ParseException {
+ public void testEmbed() throws ParseException {
TensorType tensorType = TensorType.fromSpec("tensor(d[4])");
- var expression = Expression.fromString("input myText | encode | attribute 'myTensor'",
+ var expression = Expression.fromString("input myText | embed | attribute 'myTensor'",
new SimpleLinguistics(),
new MockEncoder());
@@ -131,15 +125,15 @@ public class ScriptTestCase {
((TensorFieldValue)adapter.values.get("myTensor")).getTensor().get());
}
- private static class MockEncoder implements Encoder {
+ private static class MockEncoder implements Embedder {
@Override
- public List<Integer> encode(String text, Language language) {
+ public List<Integer> embed(String text, Language language) {
return null;
}
@Override
- public Tensor encode(String text, Language language, TensorType tensorType) {
+ public Tensor embed(String text, Language language, TensorType tensorType) {
return Tensor.from(tensorType, "[7,3,0,0]");
}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java
index 2a71aeb564c..ea0d9f9cf69 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java
@@ -1,7 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.parser;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
@@ -19,7 +19,7 @@ public class DefaultFieldNameTestCase {
public void requireThatDefaultFieldNameIsAppliedWhenArgumentIsMissing() throws ParseException {
IndexingInput input = new IndexingInput("input");
InputExpression exp = (InputExpression)Expression.newInstance(new ScriptParserContext(new SimpleLinguistics(),
- Encoder.throwsOnUse)
+ Embedder.throwsOnUse)
.setInputStream(input)
.setDefaultFieldName("foo"));
assertEquals("foo", exp.getFieldName());
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
index d7c5ae5c15a..44aa562028c 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java
@@ -2,7 +2,7 @@
package com.yahoo.vespa.indexinglanguage.parser;
import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.vespa.indexinglanguage.expressions.*;
import org.junit.Test;
@@ -85,9 +85,9 @@ public class ExpressionTestCase {
private static void assertExpression(Class expectedClass, String str) throws ParseException {
Linguistics linguistics = new SimpleLinguistics();
- Expression foo = Expression.fromString(str, linguistics, Encoder.throwsOnUse);
+ Expression foo = Expression.fromString(str, linguistics, Embedder.throwsOnUse);
assertEquals(expectedClass, foo.getClass());
- Expression bar = Expression.fromString(foo.toString(), linguistics, Encoder.throwsOnUse);
+ Expression bar = Expression.fromString(foo.toString(), linguistics, Embedder.throwsOnUse);
assertEquals(foo.hashCode(), bar.hashCode());
assertEquals(foo, bar);
}
diff --git a/linguistics-components/abi-spec.json b/linguistics-components/abi-spec.json
index 5b6729c58ef..808ec3af082 100644
--- a/linguistics-components/abi-spec.json
+++ b/linguistics-components/abi-spec.json
@@ -148,7 +148,7 @@
"public static final java.lang.String[] CONFIG_DEF_SCHEMA"
]
},
- "com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder": {
+ "com.yahoo.language.sentencepiece.SentencePieceEmbedder$Builder": {
"superClass": "java.lang.Object",
"interfaces": [],
"attributes": [
@@ -157,31 +157,31 @@
"methods": [
"public void <init>()",
"public void addModel(com.yahoo.language.Language, java.nio.file.Path)",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder addDefaultModel(java.nio.file.Path)",
+ "public com.yahoo.language.sentencepiece.SentencePieceEmbedder$Builder addDefaultModel(java.nio.file.Path)",
"public java.util.Map getModels()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setCollapseUnknowns(boolean)",
+ "public com.yahoo.language.sentencepiece.SentencePieceEmbedder$Builder setCollapseUnknowns(boolean)",
"public boolean getCollapseUnknowns()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setScoring(com.yahoo.language.sentencepiece.Scoring)",
+ "public com.yahoo.language.sentencepiece.SentencePieceEmbedder$Builder setScoring(com.yahoo.language.sentencepiece.Scoring)",
"public com.yahoo.language.sentencepiece.Scoring getScoring()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder build()"
+ "public com.yahoo.language.sentencepiece.SentencePieceEmbedder build()"
],
"fields": []
},
- "com.yahoo.language.sentencepiece.SentencePieceEncoder": {
+ "com.yahoo.language.sentencepiece.SentencePieceEmbedder": {
"superClass": "java.lang.Object",
"interfaces": [
"com.yahoo.language.process.Segmenter",
- "com.yahoo.language.process.Encoder"
+ "com.yahoo.language.process.Embedder"
],
"attributes": [
"public"
],
"methods": [
"public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig)",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder)",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceEmbedder$Builder)",
"public java.util.List segment(java.lang.String, com.yahoo.language.Language)",
- "public java.util.List encode(java.lang.String, com.yahoo.language.Language)",
- "public com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)",
+ "public java.util.List embed(java.lang.String, com.yahoo.language.Language)",
+ "public com.yahoo.tensor.Tensor embed(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)",
"public java.lang.String normalize(java.lang.String)"
],
"fields": []
diff --git a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEmbedder.java
index b6659ebeaa3..116dd15f563 100644
--- a/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEmbedder.java
@@ -4,7 +4,7 @@ package com.yahoo.language.sentencepiece;
import com.google.common.annotations.Beta;
import com.google.inject.Inject;
import com.yahoo.language.Language;
-import com.yahoo.language.process.Encoder;
+import com.yahoo.language.process.Embedder;
import com.yahoo.language.process.Segmenter;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorAddress;
@@ -19,26 +19,25 @@ import java.util.Map;
import java.util.stream.Collectors;
/**
- * Integration with https://github.com/google/sentencepiece
- * through http://docs.djl.ai/extensions/sentencepiece/index.html
+ * A native Java implementation of SentencePiece - see https://github.com/google/sentencepiece
*
- * SentencePiece is a language-agnostic tokenizer for neural nets.
+ * SentencePiece is a language-agnostic segmenter and embedder for neural nets.
*
* @author bratseth
*/
@Beta
-public class SentencePieceEncoder implements Segmenter, Encoder {
+public class SentencePieceEmbedder implements Segmenter, Embedder {
private final Map<Language, Model> models;
private final SentencePieceAlgorithm algorithm;
@Inject
- public SentencePieceEncoder(SentencePieceConfig config) {
+ public SentencePieceEmbedder(SentencePieceConfig config) {
this(new Builder(config));
}
- public SentencePieceEncoder(Builder builder) {
+ public SentencePieceEmbedder(Builder builder) {
algorithm = new SentencePieceAlgorithm(builder.collapseUnknowns, builder.getScoring());
models = builder.getModels().entrySet()
@@ -46,7 +45,7 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
.map(e -> new Model(e.getKey(), e.getValue()))
.collect(Collectors.toUnmodifiableMap(m -> m.language, m -> m));
if (models.isEmpty())
- throw new IllegalArgumentException("SentencePieceEncoder requires at least one model configured");
+ throw new IllegalArgumentException("SentencePieceEmbedder requires at least one model configured");
}
/**
@@ -77,7 +76,7 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
* @return the list of zero or more token ids resulting from segmenting the input text
*/
@Override
- public List<Integer> encode(String rawInput, Language language) {
+ public List<Integer> embed(String rawInput, Language language) {
var resultBuilder = new ResultBuilder<List<Integer>>(new ArrayList<>()) {
public void add(int segmentStart, int segmentEnd, SentencePieceAlgorithm.SegmentEnd[] segmentEnds) {
result().add(segmentEnds[segmentEnd].id);
@@ -89,7 +88,7 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
}
/**
- * <p>Encodes directly to a tensor.</p>
+ * <p>Embeds text into a tensor.</p>
*
* <p>If the tensor type is indexed 1-d (bound or unbound) this will return a tensor containing the token ids in the order
* they were encountered in the text. If the dimension is bound and too large it will be zero padded, if too small
@@ -101,10 +100,10 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
* <p>If the tensor is any other type IllegalArgumentException is thrown.</p>
*/
@Override
- public Tensor encode(String rawInput, Language language, TensorType type) {
+ public Tensor embed(String rawInput, Language language, TensorType type) {
if (type.dimensions().size() == 1 && type.dimensions().get(0).isIndexed()) {
// Build to a list first since we can't reverse a tensor builder
- List<Integer> values = encode(rawInput, language);
+ List<Integer> values = embed(rawInput, language);
long maxSize = values.size();
if (type.dimensions().get(0).size().isPresent())
@@ -125,7 +124,7 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
return builder.build();
}
else {
- throw new IllegalArgumentException("Don't know how to encode with SentencePiece into " + type);
+ throw new IllegalArgumentException("Don't know how to embed with SentencePiece into " + type);
}
}
@@ -210,9 +209,9 @@ public class SentencePieceEncoder implements Segmenter, Encoder {
}
public Scoring getScoring() { return scoring; }
- public SentencePieceEncoder build() {
+ public SentencePieceEmbedder build() {
if (models.isEmpty()) throw new IllegalStateException("At least one model must be supplied");
- return new SentencePieceEncoder(this);
+ return new SentencePieceEmbedder(this);
}
}
diff --git a/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
index b91c0c45dc4..16ada78688a 100644
--- a/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
+++ b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
@@ -1,6 +1,6 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-# Configures com.yahoo.language.sentencepiece.SentencePieceEncoder
+# Configures com.yahoo.language.sentencepiece.SentencePieceEmbedder
namespace=language.sentencepiece
diff --git a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
index edbbe21ec53..1ed2271f774 100644
--- a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
@@ -15,7 +15,7 @@ public class SentencePieceConfigurationTest {
public void testEnglishTokenization() {
var b = new SentencePieceConfig.Builder();
addModel("unknown", "src/test/models/sentencepiece/en.wiki.bpe.vs10000.model", b);
- var tester = new SentencePieceTester(new SentencePieceEncoder(b.build()));
+ var tester = new SentencePieceTester(new SentencePieceEmbedder(b.build()));
tester.assertSegmented("this is another sentence", "▁this", "▁is", "▁another", "▁sentence");
tester.assertSegmented("KHJKJHHKJHHSH hello", "▁", "KHJKJHHKJHHSH", "▁hel", "lo");
}
@@ -25,7 +25,7 @@ public class SentencePieceConfigurationTest {
var b = new SentencePieceConfig.Builder();
addModel("unknown", "src/test/models/sentencepiece/en.wiki.bpe.vs10000.model", b);
b.collapseUnknowns(false);
- var tester = new SentencePieceTester(new SentencePieceEncoder(b.build()));
+ var tester = new SentencePieceTester(new SentencePieceEmbedder(b.build()));
tester.assertSegmented("KHJ hello", "▁", "K", "H", "J", "▁hel", "lo");
}
@@ -34,7 +34,7 @@ public class SentencePieceConfigurationTest {
var b = new SentencePieceConfig.Builder();
addModel("unknown", "src/test/models/sentencepiece/en.wiki.bpe.vs10000.model", b);
b.scoring(SentencePieceConfig.Scoring.highestScore);
- var tester = new SentencePieceTester(new SentencePieceEncoder(b.build()));
+ var tester = new SentencePieceTester(new SentencePieceEmbedder(b.build()));
tester.assertSegmented("hello", "▁h", "el", "lo");
}
@@ -43,7 +43,7 @@ public class SentencePieceConfigurationTest {
var b = new SentencePieceConfig.Builder();
addModel("ja", "src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model", b);
addModel("en", "src/test/models/sentencepiece/en.wiki.bpe.vs10000.model", b);
- var tester = new SentencePieceTester(new SentencePieceEncoder(b.build()));
+ var tester = new SentencePieceTester(new SentencePieceEmbedder(b.build()));
tester.assertSegmented(Language.JAPANESE, "いくつかの通常のテキスト", "▁", "いく", "つか", "の", "通常", "の", "テ", "キ", "スト");
tester.assertSegmented(Language.ENGLISH, "hello", "▁hel", "lo");
tester.assertSegmented(Language.JAPANESE, "hello", "▁h", "ell", "o");
diff --git a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
index d60d7386d4b..939f8ebe9d3 100644
--- a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
@@ -38,27 +38,27 @@ public class SentencePieceTest {
@Test
public void testIntegerListEncoding() {
var tester = new SentencePieceTester(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath());
- tester.assertEncoded("hello, world!", 908, 1418, 9934, 501, 9960);
- tester.assertEncoded("Hello, world!", 9912, 0, 6595, 9934, 501, 9960);
+ tester.assertEmbedded("hello, world!", 908, 1418, 9934, 501, 9960);
+ tester.assertEmbedded("Hello, world!", 9912, 0, 6595, 9934, 501, 9960);
}
@Test
public void testDenseTensorEncoding() {
var tester = new SentencePieceTester(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath());
- tester.assertEncoded("hello, world!", "tensor(d[10])", "[908,1418,9934,501,9960,0,0,0,0,0]");
- tester.assertEncoded("Hello, world!", "tensor(d[10])", "[9912,0,6595,9934,501,9960,0,0,0,0]");
- tester.assertEncoded("hello, world!", "tensor(d[2])", "[908,1418]");
+ tester.assertEmbedded("hello, world!", "tensor(d[10])", "[908,1418,9934,501,9960,0,0,0,0,0]");
+ tester.assertEmbedded("Hello, world!", "tensor(d[10])", "[9912,0,6595,9934,501,9960,0,0,0,0]");
+ tester.assertEmbedded("hello, world!", "tensor(d[2])", "[908,1418]");
}
@Test
public void testSparseTensorEncoding() {
var tester = new SentencePieceTester(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath());
- tester.assertEncoded("hello", "tensor(token{})", "{lo:1.0,'▁hel':0.0}");
+ tester.assertEmbedded("hello", "tensor(token{})", "{lo:1.0,'▁hel':0.0}");
}
@Test
public void testNoCollapse() {
- var tester = new SentencePieceTester(new SentencePieceEncoder.Builder()
+ var tester = new SentencePieceTester(new SentencePieceEmbedder.Builder()
.addDefaultModel(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath())
.setCollapseUnknowns(false));
tester.assertSegmented("KHJ hello", "▁", "K", "H", "J", "▁hel", "lo");
@@ -66,7 +66,7 @@ public class SentencePieceTest {
@Test
public void testHighestScore() {
- var tester = new SentencePieceTester(new SentencePieceEncoder.Builder()
+ var tester = new SentencePieceTester(new SentencePieceEmbedder.Builder()
.addDefaultModel(new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath())
.setScoring(Scoring.highestScore));
tester.assertSegmented("h", "▁h");
@@ -77,7 +77,7 @@ public class SentencePieceTest {
@Test
public void testMultiLanguageTokenization() {
- SentencePieceEncoder.Builder builder = new SentencePieceEncoder.Builder();
+ SentencePieceEmbedder.Builder builder = new SentencePieceEmbedder.Builder();
builder.addModel(Language.JAPANESE, new File("src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model").toPath());
builder.addModel(Language.ENGLISH, new File("src/test/models/sentencepiece/en.wiki.bpe.vs10000.model").toPath());
var tester = new SentencePieceTester(builder);
diff --git a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
index 1ba7c9b472d..c4cb13a3d23 100644
--- a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
@@ -14,28 +14,28 @@ import static org.junit.Assert.assertEquals;
class SentencePieceTester {
- private final SentencePieceEncoder encoder;
+ private final SentencePieceEmbedder embedder;
public SentencePieceTester(Path model) {
- this(new SentencePieceEncoder.Builder().addDefaultModel(model));
+ this(new SentencePieceEmbedder.Builder().addDefaultModel(model));
}
- public SentencePieceTester(SentencePieceEncoder.Builder builder) {
+ public SentencePieceTester(SentencePieceEmbedder.Builder builder) {
this(builder.build());
}
- public SentencePieceTester(SentencePieceEncoder encoder) {
- this.encoder = encoder;
+ public SentencePieceTester(SentencePieceEmbedder embedder) {
+ this.embedder = embedder;
}
- public void assertEncoded(String input, Integer... expectedCodes) {
- assertArrayEquals(expectedCodes, encoder.encode(input, Language.UNKNOWN).toArray());
+ public void assertEmbedded(String input, Integer... expectedCodes) {
+ assertArrayEquals(expectedCodes, embedder.embed(input, Language.UNKNOWN).toArray());
}
- public void assertEncoded(String input, String tensorType, String tensor) {
+ public void assertEmbedded(String input, String tensorType, String tensor) {
TensorType type = TensorType.fromSpec(tensorType);
Tensor expected = Tensor.from(type, tensor);
- assertEquals(expected, encoder.encode(input, Language.UNKNOWN, type));
+ assertEquals(expected, embedder.embed(input, Language.UNKNOWN, type));
}
public void assertSegmented(String input, String... expectedSegments) {
@@ -43,7 +43,7 @@ class SentencePieceTester {
}
public void assertSegmented(Language language, String input, String... expectedSegments) {
- assertArrayEquals(expectedSegments, encoder.segment(input, language).toArray());
+ assertArrayEquals(expectedSegments, embedder.segment(input, language).toArray());
}
}
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index cfbf2abda1a..d257c451739 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -328,22 +328,22 @@
],
"fields": []
},
- "com.yahoo.language.process.Encoder$FailingEncoder": {
+ "com.yahoo.language.process.Embedder$FailingEmbedder": {
"superClass": "java.lang.Object",
"interfaces": [
- "com.yahoo.language.process.Encoder"
+ "com.yahoo.language.process.Embedder"
],
"attributes": [
"public"
],
"methods": [
"public void <init>()",
- "public java.util.List encode(java.lang.String, com.yahoo.language.Language)",
- "public com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)"
+ "public java.util.List embed(java.lang.String, com.yahoo.language.Language)",
+ "public com.yahoo.tensor.Tensor embed(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)"
],
"fields": []
},
- "com.yahoo.language.process.Encoder": {
+ "com.yahoo.language.process.Embedder": {
"superClass": "java.lang.Object",
"interfaces": [],
"attributes": [
@@ -352,11 +352,11 @@
"abstract"
],
"methods": [
- "public abstract java.util.List encode(java.lang.String, com.yahoo.language.Language)",
- "public abstract com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)"
+ "public abstract java.util.List embed(java.lang.String, com.yahoo.language.Language)",
+ "public abstract com.yahoo.tensor.Tensor embed(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)"
],
"fields": [
- "public static final com.yahoo.language.process.Encoder throwsOnUse"
+ "public static final com.yahoo.language.process.Embedder throwsOnUse"
]
},
"com.yahoo.language.process.GramSplitter$Gram": {
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
new file mode 100644
index 00000000000..56c401a7c61
--- /dev/null
+++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
@@ -0,0 +1,56 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.process;
+
+import com.yahoo.language.Language;
+import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.TensorType;
+
+import java.util.List;
+
+/**
+ * An embedder converts a text string to a tensor
+ *
+ * @author bratseth
+ */
+public interface Embedder {
+
+ /** An instance of this which throws IllegalStateException if attempted used */
+ Embedder throwsOnUse = new FailingEmbedder();
+
+ /**
+ * Converts text into a list of token id's (a vector embedding)
+ *
+ * @param text the text to embed
+ * @param language the language of the text, or UNKNOWN to use language independent embedding
+ * @return the text embedded as a list of token ids
+ * @throws IllegalArgumentException if the language is not supported by this embedder
+ */
+ List<Integer> embed(String text, Language language);
+
+ /**
+ * Converts text into tokens in a tensor.
+ * The information contained in the embedding may depend on the tensor type.
+ *
+ * @param text the text to embed
+ * @param language the language of the text, or UNKNOWN to use language independent embedding
+ * @param tensorType the type of the tensor to be returned
+ * @return the tensor embedding of the text, as the spoecified tensor type
+ * @throws IllegalArgumentException if the language or tensor type is not supported by this embedder
+ */
+ Tensor embed(String text, Language language, TensorType tensorType);
+
+ class FailingEmbedder implements Embedder {
+
+ @Override
+ public List<Integer> embed(String text, Language language) {
+ throw new IllegalStateException("No embedder has been configured");
+ }
+
+ @Override
+ public Tensor embed(String text, Language language, TensorType tensorType) {
+ throw new IllegalStateException("No embedder has been configured");
+ }
+
+ }
+
+}
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java b/linguistics/src/main/java/com/yahoo/language/process/Encoder.java
deleted file mode 100644
index 27f73d15e54..00000000000
--- a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.process;
-
-import com.yahoo.language.Language;
-import com.yahoo.tensor.Tensor;
-import com.yahoo.tensor.TensorType;
-
-import java.util.List;
-
-/**
- * An encoder converts a text string to a tensor or list of tokens
- *
- * @author bratseth
- */
-public interface Encoder {
-
- /** An instance of this which throws IllegalStateException if attempted used */
- Encoder throwsOnUse = new FailingEncoder();
-
- /**
- * Encodes text into tokens in a list of ids.
- *
- * @param text the text to encode
- * @param language the language of the text, or UNKNOWN to use language independent encoding
- * @return the text encoded to a list of segment ids
- * @throws IllegalArgumentException if the language is not supported by this encoder
- */
- List<Integer> encode(String text, Language language);
-
- /**
- * Encodes text into tokens in a tensor.
- * The information contained in the encoding may depend on the tensor type.
- *
- * @param text the text to encode
- * @param language the language of the text, or UNKNOWN to use language independent encoding
- * @param tensorType the type of the ttensor to be returned
- * @return the tex encoded into a tensor of the supplied type
- * @throws IllegalArgumentException if the language or tensor type is not supported by this encoder
- */
- Tensor encode(String text, Language language, TensorType tensorType);
-
- class FailingEncoder implements Encoder {
-
- @Override
- public List<Integer> encode(String text, Language language) {
- throw new IllegalStateException("No encoder has been configured");
- }
-
- @Override
- public Tensor encode(String text, Language language, TensorType tensorType) {
- throw new IllegalStateException("No encoder has been configured");
- }
-
- }
-
-}