aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java3
-rw-r--r--container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java10
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java2
-rw-r--r--linguistics/abi-spec.json20
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Embedder.java37
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java5
-rw-r--r--metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java4
-rw-r--r--model-integration/pom.xml12
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java22
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java52
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java18
-rw-r--r--model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java3
12 files changed, 172 insertions, 16 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 31f8eba48bf..3a679782966 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -792,6 +792,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
/* The ONNX runtime is always available for injection to any component */
cluster.addSimpleComponent(
ContainerModelEvaluation.ONNX_RUNTIME_CLASS, null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME);
+ /* Add runtime providing utilities such as metrics to embedder implementations */
+ cluster.addSimpleComponent(
+ "ai.vespa.embedding.EmbedderRuntime", null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME);
}
private void addProcessing(DeployState deployState, Element spec, ApplicationContainerCluster cluster, ConfigModelContext context) {
diff --git a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java
index 2370513dba2..3d9d28ee199 100644
--- a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java
+++ b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java
@@ -52,11 +52,12 @@ public class TensorConverter {
throw new IllegalArgumentException("Expected any string enclosed in embed(), but the argument does not end by ')'");
String argument = s.substring("embed(".length(), s.length() - 1);
Embedder embedder;
+ String embedderId;
// Check if arguments specifies an embedder with the format embed(embedder, "text to encode")
Matcher matcher = embedderArgumentRegexp.matcher(argument);
if (matcher.matches()) {
- String embedderId = matcher.group(1);
+ embedderId = matcher.group(1);
argument = matcher.group(2);
if ( ! embedders.containsKey(embedderId)) {
throw new IllegalArgumentException("Can't find embedder '" + embedderId + "'. " +
@@ -69,10 +70,11 @@ public class TensorConverter {
throw new IllegalArgumentException("Multiple embedders are provided but no embedder id is given. " +
"Valid embedders are " + validEmbedders(embedders));
} else {
- embedder = embedders.entrySet().stream().findFirst().get().getValue();
+ var entry = embedders.entrySet().stream().findFirst().get();
+ embedderId = entry.getKey();
+ embedder = entry.getValue();
}
-
- return embedder.embed(removeQuotes(argument), embedderContext, type);
+ return embedder.embed(removeQuotes(argument), embedderContext.copy().setEmbedderId(embedderId), type);
}
private static String removeQuotes(String s) {
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java
index 9f2260e5b94..5ee5fea3158 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java
@@ -106,7 +106,7 @@ public class EmbedExpression extends Expression {
private Tensor embed(String input, TensorType targetType, ExecutionContext context) {
return embedder.embed(input,
- new Embedder.Context(destination).setLanguage(context.getLanguage()),
+ new Embedder.Context(destination).setLanguage(context.getLanguage()).setEmbedderId(embedderId),
targetType);
}
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index dc85a2e6f0b..680aec3ff57 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -338,10 +338,13 @@
],
"methods" : [
"public void <init>(java.lang.String)",
+ "public com.yahoo.language.process.Embedder$Context copy()",
"public com.yahoo.language.Language getLanguage()",
"public com.yahoo.language.process.Embedder$Context setLanguage(com.yahoo.language.Language)",
"public java.lang.String getDestination()",
- "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)"
+ "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)",
+ "public java.lang.String getEmbedderId()",
+ "public com.yahoo.language.process.Embedder$Context setEmbedderId(java.lang.String)"
],
"fields" : [ ]
},
@@ -361,6 +364,21 @@
],
"fields" : [ ]
},
+ "com.yahoo.language.process.Embedder$Runtime" : {
+ "superClass" : "java.lang.Object",
+ "interfaces" : [ ],
+ "attributes" : [
+ "public",
+ "interface",
+ "abstract"
+ ],
+ "methods" : [
+ "public abstract void sampleEmbeddingLatency(java.time.Duration, com.yahoo.language.process.Embedder$Context)",
+ "public abstract void sampleSequenceLength(long, com.yahoo.language.process.Embedder$Context)",
+ "public static com.yahoo.language.process.Embedder$Runtime testInstance()"
+ ],
+ "fields" : [ ]
+ },
"com.yahoo.language.process.Embedder" : {
"superClass" : "java.lang.Object",
"interfaces" : [ ],
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
index 055861c5388..1d2e1bcc847 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
@@ -1,10 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.process;
+import com.yahoo.api.annotations.Beta;
import com.yahoo.language.Language;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
+import java.time.Duration;
import java.util.List;
import java.util.Map;
@@ -64,15 +66,42 @@ public interface Embedder {
*/
Tensor embed(String text, Context context, TensorType tensorType);
+ /**
+ * Runtime that is injectable through {@link Embedder} constructor.
+ */
+ @Beta
+ interface Runtime {
+ /** Sample latency metric for embedding */
+ void sampleEmbeddingLatency(Duration latency, Context ctx);
+ /** Sample sequence length metric for embedding */
+ void sampleSequenceLength(long length, Context ctx);
+
+ static Runtime testInstance() {
+ return new Runtime() {
+ @Override public void sampleEmbeddingLatency(Duration latency, Context ctx) { }
+ @Override public void sampleSequenceLength(long length, Context ctx) { }
+ };
+ }
+ }
+
class Context {
private Language language = Language.UNKNOWN;
private String destination;
+ private String embedderId = "unknown";
public Context(String destination) {
this.destination = destination;
}
+ private Context(Context other) {
+ language = other.language;
+ destination = other.destination;
+ embedderId = other.embedderId;
+ }
+
+ public Context copy() { return new Context(this); }
+
/** Returns the language of the text, or UNKNOWN (default) to use a language independent embedding */
public Language getLanguage() { return language; }
@@ -102,6 +131,14 @@ public interface Embedder {
return this;
}
+ /** Return the embedder id or 'unknown' if not set */
+ public String getEmbedderId() { return embedderId; }
+
+ /** Sets the embedder id */
+ public Context setEmbedderId(String embedderId) {
+ this.embedderId = embedderId;
+ return this;
+ }
}
class FailingEmbedder implements Embedder {
diff --git a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java
index 4c7e203fee5..e515bbf9275 100644
--- a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java
+++ b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java
@@ -200,9 +200,10 @@ public enum ContainerMetrics implements VespaMetrics {
SERVER_TOTAL_FAILED_RESPONSE_LATENCY("serverTotalFailedResponseLatency", Unit.MILLISECOND, "Total duration for execution of failed responses"),
SERVER_TIME_TO_FIRST_BYTE("serverTimeToFirstByte", Unit.MILLISECOND, "Time from request has been received by the server until the first byte is returned to the client"),
- SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started");
-
+ SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started"),
+ EMBEDDER_LATENCY("embedder.latency", Unit.MILLISECOND, "Time spent creating an embedding"),
+ EMBEDDER_SEQUENCE_LENGTH("embedder.sequence_length", Unit.BYTE, "Size of sequence produced by tokenizer");
private final String name;
private final Unit unit;
diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java
index 4ec596f8ce7..b7ed7293d6c 100644
--- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java
+++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java
@@ -108,6 +108,10 @@ public class VespaMetricSet {
// Routing layer metrics
addMetric(metrics, RoutingLayerMetrics.WORKER_CONNECTIONS.max()); // Hosted Vespa only (routing layer)
+ // Embedders
+ addMetric(metrics, ContainerMetrics.EMBEDDER_LATENCY, EnumSet.of(max, sum, count));
+ addMetric(metrics, ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH, EnumSet.of(max, sum, count));
+
return metrics;
}
diff --git a/model-integration/pom.xml b/model-integration/pom.xml
index 854e15298c6..d195a061c52 100644
--- a/model-integration/pom.xml
+++ b/model-integration/pom.xml
@@ -87,6 +87,18 @@
<scope>provided</scope>
</dependency>
<dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>container-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>metrics</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<scope>provided</scope>
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
index a12424c7d12..8b5b0f23b99 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
@@ -12,6 +12,8 @@ import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
+import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -40,11 +42,13 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
private final String outputName;
private final PoolingStrategy poolingStrategy;
+ private final Embedder.Runtime runtime;
private final WordPieceEmbedder tokenizer;
private final OnnxEvaluator evaluator;
@Inject
- public BertBaseEmbedder(OnnxRuntime onnx, BertBaseEmbedderConfig config) {
+ public BertBaseEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, BertBaseEmbedderConfig config) {
+ this.runtime = runtime;
maxTokens = config.transformerMaxTokens();
startSequenceToken = config.transformerStartSequenceToken();
endSequenceToken = config.transformerEndSequenceToken();
@@ -87,11 +91,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String text, Context context) {
- return tokenizer.embed(text, context);
+ var start = Instant.now();
+ var tokens = tokenize(text, context);
+ runtime.sampleSequenceLength(tokens.size(), context);
+ runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ return tokens;
}
@Override
public Tensor embed(String text, Context context, TensorType type) {
+ var start = Instant.now();
if (type.dimensions().size() != 1) {
throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension.");
}
@@ -99,11 +108,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
throw new IllegalArgumentException("Error in embedding to type '" + type + "': dimension should be indexed.");
}
List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens);
- return embedTokens(tokens, type);
+ runtime.sampleSequenceLength(tokens.size(), context);
+ var embedding = embedTokens(tokens, type);
+ runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ return embedding;
}
@Override public void deconstruct() { evaluator.close(); }
+ private List<Integer> tokenize(String text, Context ctx) { return tokenizer.embed(text, ctx); }
+
Tensor embedTokens(List<Integer> tokens, TensorType type) {
Tensor inputSequence = createTensorRepresentation(tokens, "d1");
Tensor attentionMask = createAttentionMask(inputSequence);
@@ -129,7 +143,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
private List<Integer> embedWithSeparatorTokens(String text, Context context, int maxLength) {
List<Integer> tokens = new ArrayList<>();
tokens.add(startSequenceToken);
- tokens.addAll(embed(text, context));
+ tokens.addAll(tokenize(text, context));
tokens.add(endSequenceToken);
if (tokens.size() > maxLength) {
tokens = tokens.subList(0, maxLength-1);
diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
new file mode 100644
index 00000000000..905dd720ef0
--- /dev/null
+++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
@@ -0,0 +1,52 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package ai.vespa.embedding;
+
+import ai.vespa.metrics.ContainerMetrics;
+import com.yahoo.component.annotation.Inject;
+import com.yahoo.language.Language;
+import com.yahoo.language.process.Embedder;
+import com.yahoo.metrics.simple.Gauge;
+import com.yahoo.metrics.simple.MetricReceiver;
+import com.yahoo.metrics.simple.Point;
+
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author bjorncs
+ */
+public class EmbedderRuntime implements Embedder.Runtime {
+
+ private final Gauge embedLatency;
+ private final Gauge sequenceLength;
+ private final Map<MetricDimensions, Point> metricPointCache = new HashMap<>();
+
+ @Inject
+ public EmbedderRuntime(MetricReceiver metrics) {
+ embedLatency = metrics.declareGauge(ContainerMetrics.EMBEDDER_LATENCY.baseName());
+ sequenceLength = metrics.declareGauge(ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH.baseName());
+ }
+
+ @Override
+ public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) {
+ embedLatency.sample(latency.toMillis(), metricPoint(ctx));
+ }
+
+ @Override
+ public void sampleSequenceLength(long length, Embedder.Context ctx) {
+ sequenceLength.sample(length, metricPoint(ctx));
+ }
+
+ private Point metricPoint(Embedder.Context ctx) {
+ var dimensions = new MetricDimensions(ctx.getEmbedderId(), ctx.getLanguage(), ctx.getDestination());
+ return metricPointCache.computeIfAbsent(
+ dimensions, d -> new Point(Map.of("embedder", d.embedderId(),
+ "language", d.language().languageCode(),
+ "destination", d.destination())));
+ }
+
+ private record MetricDimensions(String embedderId, Language language, String destination) {}
+
+}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index b035541bb0f..73d6424739e 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -16,6 +16,8 @@ import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
@@ -27,6 +29,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
private static final Logger log = Logger.getLogger(HuggingFaceEmbedder.class.getName());
+ private final Embedder.Runtime runtime;
private final String inputIdsName;
private final String attentionMaskName;
private final String tokenTypeIdsName;
@@ -37,7 +40,8 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
private final PoolingStrategy poolingStrategy;
@Inject
- public HuggingFaceEmbedder(OnnxRuntime onnx, HuggingFaceEmbedderConfig config) {
+ public HuggingFaceEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, HuggingFaceEmbedderConfig config) {
+ this.runtime = runtime;
inputIdsName = config.transformerInputIds();
attentionMaskName = config.transformerAttentionMask();
tokenTypeIdsName = config.transformerTokenTypeIds();
@@ -87,7 +91,11 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String s, Context context) {
- return tokenizer.embed(s, context);
+ var start = Instant.now();
+ var tokens = tokenizer.embed(s, context);
+ runtime.sampleSequenceLength(tokens.size(), context);
+ runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ return tokens;
}
@Override
@@ -98,7 +106,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public Tensor embed(String s, Context context, TensorType tensorType) {
+ var start = Instant.now();
var encoding = tokenizer.encode(s, context.getLanguage());
+ runtime.sampleSequenceLength(encoding.ids().size(), context);
Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1");
Tensor attentionMask = createTensorRepresentation(encoding.attentionMask(), "d1");
Tensor tokenTypeIds = tokenTypeIdsName.isEmpty() ? null : createTensorRepresentation(encoding.typeIds(), "d1");
@@ -117,7 +127,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
Map<String, Tensor> outputs = evaluator.evaluate(inputs);
Tensor tokenEmbeddings = outputs.get(outputName);
var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask);
- return normalize ? normalize(result, tensorType) : result;
+ var normalized = normalize ? normalize(result, tensorType) : result;
+ runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ return normalized;
}
Tensor normalize(Tensor embedding, TensorType tensorType) {
diff --git a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java
index 329b87cacd1..a0964eb5812 100644
--- a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java
+++ b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java
@@ -3,6 +3,7 @@ package ai.vespa.embedding;
import ai.vespa.modelintegration.evaluator.OnnxRuntime;
import com.yahoo.config.ModelReference;
import com.yahoo.embedding.BertBaseEmbedderConfig;
+import com.yahoo.language.process.Embedder;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
import org.junit.Test;
@@ -69,7 +70,7 @@ public class BertBaseEmbedderTest {
}
private static BertBaseEmbedder newBertBaseEmbedder(BertBaseEmbedderConfig cfg) {
- return new BertBaseEmbedder(new OnnxRuntime(), cfg);
+ return new BertBaseEmbedder(new OnnxRuntime(), Embedder.Runtime.testInstance(), cfg);
}
}