diff options
12 files changed, 166 insertions, 16 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index e7692aeee7b..0e72cff1688 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -797,6 +797,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { /* The ONNX runtime is always available for injection to any component */ cluster.addSimpleComponent( ContainerModelEvaluation.ONNX_RUNTIME_CLASS, null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME); + /* Add runtime providing utilities such as metrics to embedder implementations */ + cluster.addSimpleComponent( + "ai.vespa.embedding.EmbedderRuntime", null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME); } private void addProcessing(DeployState deployState, Element spec, ApplicationContainerCluster cluster, ConfigModelContext context) { diff --git a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java index 2370513dba2..3d9d28ee199 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java +++ b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java @@ -52,11 +52,12 @@ public class TensorConverter { throw new IllegalArgumentException("Expected any string enclosed in embed(), but the argument does not end by ')'"); String argument = s.substring("embed(".length(), s.length() - 1); Embedder embedder; + String embedderId; // Check if arguments specifies an embedder with the format embed(embedder, "text to encode") Matcher matcher = embedderArgumentRegexp.matcher(argument); if (matcher.matches()) { - String embedderId = matcher.group(1); + embedderId = matcher.group(1); argument = matcher.group(2); if ( ! embedders.containsKey(embedderId)) { throw new IllegalArgumentException("Can't find embedder '" + embedderId + "'. " + @@ -69,10 +70,11 @@ public class TensorConverter { throw new IllegalArgumentException("Multiple embedders are provided but no embedder id is given. " + "Valid embedders are " + validEmbedders(embedders)); } else { - embedder = embedders.entrySet().stream().findFirst().get().getValue(); + var entry = embedders.entrySet().stream().findFirst().get(); + embedderId = entry.getKey(); + embedder = entry.getValue(); } - - return embedder.embed(removeQuotes(argument), embedderContext, type); + return embedder.embed(removeQuotes(argument), embedderContext.copy().setEmbedderId(embedderId), type); } private static String removeQuotes(String s) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java index 9f2260e5b94..5ee5fea3158 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java @@ -106,7 +106,7 @@ public class EmbedExpression extends Expression { private Tensor embed(String input, TensorType targetType, ExecutionContext context) { return embedder.embed(input, - new Embedder.Context(destination).setLanguage(context.getLanguage()), + new Embedder.Context(destination).setLanguage(context.getLanguage()).setEmbedderId(embedderId), targetType); } diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index dc85a2e6f0b..1ffb879e57e 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -338,10 +338,13 @@ ], "methods" : [ "public void <init>(java.lang.String)", + "public com.yahoo.language.process.Embedder$Context copy()", "public com.yahoo.language.Language getLanguage()", "public com.yahoo.language.process.Embedder$Context setLanguage(com.yahoo.language.Language)", "public java.lang.String getDestination()", - "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)" + "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)", + "public java.lang.String getEmbedderId()", + "public com.yahoo.language.process.Embedder$Context setEmbedderId(java.lang.String)" ], "fields" : [ ] }, @@ -361,6 +364,21 @@ ], "fields" : [ ] }, + "com.yahoo.language.process.Embedder$Runtime" : { + "superClass" : "java.lang.Object", + "interfaces" : [ ], + "attributes" : [ + "public", + "interface", + "abstract" + ], + "methods" : [ + "public abstract void sampleEmbeddingLatency(double, com.yahoo.language.process.Embedder$Context)", + "public abstract void sampleSequenceLength(long, com.yahoo.language.process.Embedder$Context)", + "public static com.yahoo.language.process.Embedder$Runtime testInstance()" + ], + "fields" : [ ] + }, "com.yahoo.language.process.Embedder" : { "superClass" : "java.lang.Object", "interfaces" : [ ], diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java index 055861c5388..98030a4f054 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.process; +import com.yahoo.api.annotations.Beta; import com.yahoo.language.Language; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; @@ -64,15 +65,42 @@ public interface Embedder { */ Tensor embed(String text, Context context, TensorType tensorType); + /** + * Runtime that is injectable through {@link Embedder} constructor. + */ + @Beta + interface Runtime { + /** Sample latency metric for embedding */ + void sampleEmbeddingLatency(double millis, Context ctx); + /** Sample sequence length metric for embedding */ + void sampleSequenceLength(long length, Context ctx); + + static Runtime testInstance() { + return new Runtime() { + @Override public void sampleEmbeddingLatency(double millis, Context ctx) { } + @Override public void sampleSequenceLength(long length, Context ctx) { } + }; + } + } + class Context { private Language language = Language.UNKNOWN; private String destination; + private String embedderId = "unknown"; public Context(String destination) { this.destination = destination; } + private Context(Context other) { + language = other.language; + destination = other.destination; + embedderId = other.embedderId; + } + + public Context copy() { return new Context(this); } + /** Returns the language of the text, or UNKNOWN (default) to use a language independent embedding */ public Language getLanguage() { return language; } @@ -102,6 +130,14 @@ public interface Embedder { return this; } + /** Return the embedder id or 'unknown' if not set */ + public String getEmbedderId() { return embedderId; } + + /** Sets the embedder id */ + public Context setEmbedderId(String embedderId) { + this.embedderId = embedderId; + return this; + } } class FailingEmbedder implements Embedder { diff --git a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java index 448233162e4..ac7ecfa124a 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java @@ -201,9 +201,10 @@ public enum ContainerMetrics implements VespaMetrics { SERVER_TOTAL_FAILED_RESPONSE_LATENCY("serverTotalFailedResponseLatency", Unit.MILLISECOND, "Total duration for execution of failed responses"), SERVER_TIME_TO_FIRST_BYTE("serverTimeToFirstByte", Unit.MILLISECOND, "Time from request has been received by the server until the first byte is returned to the client"), - SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started"); - + SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started"), + EMBEDDER_LATENCY("embedder.latency", Unit.MILLISECOND, "Time spent creating an embedding"), + EMBEDDER_SEQUENCE_LENGTH("embedder.sequence_length", Unit.BYTE, "Size of sequence produced by tokenizer"); private final String name; private final Unit unit; diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index 39c1e6f519f..6c4626238eb 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -116,6 +116,10 @@ public class VespaMetricSet { // Routing layer metrics addMetric(metrics, RoutingLayerMetrics.WORKER_CONNECTIONS.max()); // Hosted Vespa only (routing layer) + // Embedders + addMetric(metrics, ContainerMetrics.EMBEDDER_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH, EnumSet.of(max, sum, count)); + return metrics; } diff --git a/model-integration/pom.xml b/model-integration/pom.xml index 854e15298c6..d195a061c52 100644 --- a/model-integration/pom.xml +++ b/model-integration/pom.xml @@ -87,6 +87,18 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>metrics</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <scope>provided</scope> diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index a12424c7d12..2c4f09b3821 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -40,11 +40,13 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { private final String outputName; private final PoolingStrategy poolingStrategy; + private final Embedder.Runtime runtime; private final WordPieceEmbedder tokenizer; private final OnnxEvaluator evaluator; @Inject - public BertBaseEmbedder(OnnxRuntime onnx, BertBaseEmbedderConfig config) { + public BertBaseEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, BertBaseEmbedderConfig config) { + this.runtime = runtime; maxTokens = config.transformerMaxTokens(); startSequenceToken = config.transformerStartSequenceToken(); endSequenceToken = config.transformerEndSequenceToken(); @@ -87,11 +89,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String text, Context context) { - return tokenizer.embed(text, context); + var start = System.nanoTime(); + var tokens = tokenize(text, context); + runtime.sampleSequenceLength(tokens.size(), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); + return tokens; } @Override public Tensor embed(String text, Context context, TensorType type) { + var start = System.nanoTime(); if (type.dimensions().size() != 1) { throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension."); } @@ -99,11 +106,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { throw new IllegalArgumentException("Error in embedding to type '" + type + "': dimension should be indexed."); } List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens); - return embedTokens(tokens, type); + runtime.sampleSequenceLength(tokens.size(), context); + var embedding = embedTokens(tokens, type); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); + return embedding; } @Override public void deconstruct() { evaluator.close(); } + private List<Integer> tokenize(String text, Context ctx) { return tokenizer.embed(text, ctx); } + Tensor embedTokens(List<Integer> tokens, TensorType type) { Tensor inputSequence = createTensorRepresentation(tokens, "d1"); Tensor attentionMask = createAttentionMask(inputSequence); @@ -129,7 +141,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { private List<Integer> embedWithSeparatorTokens(String text, Context context, int maxLength) { List<Integer> tokens = new ArrayList<>(); tokens.add(startSequenceToken); - tokens.addAll(embed(text, context)); + tokens.addAll(tokenize(text, context)); tokens.add(endSequenceToken); if (tokens.size() > maxLength) { tokens = tokens.subList(0, maxLength-1); diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java new file mode 100644 index 00000000000..45068db67f4 --- /dev/null +++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.embedding; + +import ai.vespa.metrics.ContainerMetrics; +import com.yahoo.component.annotation.Inject; +import com.yahoo.language.Language; +import com.yahoo.language.process.Embedder; +import com.yahoo.metrics.simple.Gauge; +import com.yahoo.metrics.simple.MetricReceiver; +import com.yahoo.metrics.simple.Point; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author bjorncs + */ +public class EmbedderRuntime implements Embedder.Runtime { + + private final Gauge embedLatency; + private final Gauge sequenceLength; + private final Map<MetricDimensions, Point> metricPointCache = new HashMap<>(); + + @Inject + public EmbedderRuntime(MetricReceiver metrics) { + embedLatency = metrics.declareGauge(ContainerMetrics.EMBEDDER_LATENCY.baseName()); + sequenceLength = metrics.declareGauge(ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH.baseName()); + } + + @Override + public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) { + embedLatency.sample(millis, metricPoint(ctx)); + } + + @Override + public void sampleSequenceLength(long length, Embedder.Context ctx) { + sequenceLength.sample(length, metricPoint(ctx)); + } + + private Point metricPoint(Embedder.Context ctx) { + var dimensions = new MetricDimensions(ctx.getEmbedderId(), ctx.getLanguage(), ctx.getDestination()); + return metricPointCache.computeIfAbsent( + dimensions, d -> new Point(Map.of("embedder", d.embedderId(), + "language", d.language().languageCode(), + "destination", d.destination()))); + } + + private record MetricDimensions(String embedderId, Language language, String destination) {} + +} diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java index b035541bb0f..ab8d33dbf17 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java @@ -27,6 +27,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { private static final Logger log = Logger.getLogger(HuggingFaceEmbedder.class.getName()); + private final Embedder.Runtime runtime; private final String inputIdsName; private final String attentionMaskName; private final String tokenTypeIdsName; @@ -37,7 +38,8 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { private final PoolingStrategy poolingStrategy; @Inject - public HuggingFaceEmbedder(OnnxRuntime onnx, HuggingFaceEmbedderConfig config) { + public HuggingFaceEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, HuggingFaceEmbedderConfig config) { + this.runtime = runtime; inputIdsName = config.transformerInputIds(); attentionMaskName = config.transformerAttentionMask(); tokenTypeIdsName = config.transformerTokenTypeIds(); @@ -87,7 +89,11 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String s, Context context) { - return tokenizer.embed(s, context); + var start = System.nanoTime(); + var tokens = tokenizer.embed(s, context); + runtime.sampleSequenceLength(tokens.size(), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); + return tokens; } @Override @@ -98,7 +104,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public Tensor embed(String s, Context context, TensorType tensorType) { + var start = System.nanoTime(); var encoding = tokenizer.encode(s, context.getLanguage()); + runtime.sampleSequenceLength(encoding.ids().size(), context); Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1"); Tensor attentionMask = createTensorRepresentation(encoding.attentionMask(), "d1"); Tensor tokenTypeIds = tokenTypeIdsName.isEmpty() ? null : createTensorRepresentation(encoding.typeIds(), "d1"); @@ -117,7 +125,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { Map<String, Tensor> outputs = evaluator.evaluate(inputs); Tensor tokenEmbeddings = outputs.get(outputName); var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask); - return normalize ? normalize(result, tensorType) : result; + var normalized = normalize ? normalize(result, tensorType) : result; + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); + return normalized; } Tensor normalize(Tensor embedding, TensorType tensorType) { diff --git a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java index 329b87cacd1..a0964eb5812 100644 --- a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java +++ b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java @@ -3,6 +3,7 @@ package ai.vespa.embedding; import ai.vespa.modelintegration.evaluator.OnnxRuntime; import com.yahoo.config.ModelReference; import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.language.process.Embedder; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; import org.junit.Test; @@ -69,7 +70,7 @@ public class BertBaseEmbedderTest { } private static BertBaseEmbedder newBertBaseEmbedder(BertBaseEmbedderConfig cfg) { - return new BertBaseEmbedder(new OnnxRuntime(), cfg); + return new BertBaseEmbedder(new OnnxRuntime(), Embedder.Runtime.testInstance(), cfg); } } |