diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-08-04 13:33:13 +0200 |
---|---|---|
committer | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-08-04 13:52:27 +0200 |
commit | 3ddce3c158941cb8eabdb391d207fe004095c434 (patch) | |
tree | db8121fe016fa7bb5a0ad345af9f1bd58fbbfb6d | |
parent | 0f46015e498ecb622473cd3e2403283c99f9f5d5 (diff) |
Add generic metrics for embedders
12 files changed, 172 insertions, 16 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 31f8eba48bf..3a679782966 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -792,6 +792,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { /* The ONNX runtime is always available for injection to any component */ cluster.addSimpleComponent( ContainerModelEvaluation.ONNX_RUNTIME_CLASS, null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME); + /* Add runtime providing utilities such as metrics to embedder implementations */ + cluster.addSimpleComponent( + "ai.vespa.embedding.EmbedderRuntime", null, ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME); } private void addProcessing(DeployState deployState, Element spec, ApplicationContainerCluster cluster, ConfigModelContext context) { diff --git a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java index 2370513dba2..3d9d28ee199 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java +++ b/container-search/src/main/java/com/yahoo/search/schema/internal/TensorConverter.java @@ -52,11 +52,12 @@ public class TensorConverter { throw new IllegalArgumentException("Expected any string enclosed in embed(), but the argument does not end by ')'"); String argument = s.substring("embed(".length(), s.length() - 1); Embedder embedder; + String embedderId; // Check if arguments specifies an embedder with the format embed(embedder, "text to encode") Matcher matcher = embedderArgumentRegexp.matcher(argument); if (matcher.matches()) { - String embedderId = matcher.group(1); + embedderId = matcher.group(1); argument = matcher.group(2); if ( ! embedders.containsKey(embedderId)) { throw new IllegalArgumentException("Can't find embedder '" + embedderId + "'. " + @@ -69,10 +70,11 @@ public class TensorConverter { throw new IllegalArgumentException("Multiple embedders are provided but no embedder id is given. " + "Valid embedders are " + validEmbedders(embedders)); } else { - embedder = embedders.entrySet().stream().findFirst().get().getValue(); + var entry = embedders.entrySet().stream().findFirst().get(); + embedderId = entry.getKey(); + embedder = entry.getValue(); } - - return embedder.embed(removeQuotes(argument), embedderContext, type); + return embedder.embed(removeQuotes(argument), embedderContext.copy().setEmbedderId(embedderId), type); } private static String removeQuotes(String s) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java index 9f2260e5b94..5ee5fea3158 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java @@ -106,7 +106,7 @@ public class EmbedExpression extends Expression { private Tensor embed(String input, TensorType targetType, ExecutionContext context) { return embedder.embed(input, - new Embedder.Context(destination).setLanguage(context.getLanguage()), + new Embedder.Context(destination).setLanguage(context.getLanguage()).setEmbedderId(embedderId), targetType); } diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index dc85a2e6f0b..680aec3ff57 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -338,10 +338,13 @@ ], "methods" : [ "public void <init>(java.lang.String)", + "public com.yahoo.language.process.Embedder$Context copy()", "public com.yahoo.language.Language getLanguage()", "public com.yahoo.language.process.Embedder$Context setLanguage(com.yahoo.language.Language)", "public java.lang.String getDestination()", - "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)" + "public com.yahoo.language.process.Embedder$Context setDestination(java.lang.String)", + "public java.lang.String getEmbedderId()", + "public com.yahoo.language.process.Embedder$Context setEmbedderId(java.lang.String)" ], "fields" : [ ] }, @@ -361,6 +364,21 @@ ], "fields" : [ ] }, + "com.yahoo.language.process.Embedder$Runtime" : { + "superClass" : "java.lang.Object", + "interfaces" : [ ], + "attributes" : [ + "public", + "interface", + "abstract" + ], + "methods" : [ + "public abstract void sampleEmbeddingLatency(java.time.Duration, com.yahoo.language.process.Embedder$Context)", + "public abstract void sampleSequenceLength(long, com.yahoo.language.process.Embedder$Context)", + "public static com.yahoo.language.process.Embedder$Runtime testInstance()" + ], + "fields" : [ ] + }, "com.yahoo.language.process.Embedder" : { "superClass" : "java.lang.Object", "interfaces" : [ ], diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java index 055861c5388..1d2e1bcc847 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java @@ -1,10 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.process; +import com.yahoo.api.annotations.Beta; import com.yahoo.language.Language; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; +import java.time.Duration; import java.util.List; import java.util.Map; @@ -64,15 +66,42 @@ public interface Embedder { */ Tensor embed(String text, Context context, TensorType tensorType); + /** + * Runtime that is injectable through {@link Embedder} constructor. + */ + @Beta + interface Runtime { + /** Sample latency metric for embedding */ + void sampleEmbeddingLatency(Duration latency, Context ctx); + /** Sample sequence length metric for embedding */ + void sampleSequenceLength(long length, Context ctx); + + static Runtime testInstance() { + return new Runtime() { + @Override public void sampleEmbeddingLatency(Duration latency, Context ctx) { } + @Override public void sampleSequenceLength(long length, Context ctx) { } + }; + } + } + class Context { private Language language = Language.UNKNOWN; private String destination; + private String embedderId = "unknown"; public Context(String destination) { this.destination = destination; } + private Context(Context other) { + language = other.language; + destination = other.destination; + embedderId = other.embedderId; + } + + public Context copy() { return new Context(this); } + /** Returns the language of the text, or UNKNOWN (default) to use a language independent embedding */ public Language getLanguage() { return language; } @@ -102,6 +131,14 @@ public interface Embedder { return this; } + /** Return the embedder id or 'unknown' if not set */ + public String getEmbedderId() { return embedderId; } + + /** Sets the embedder id */ + public Context setEmbedderId(String embedderId) { + this.embedderId = embedderId; + return this; + } } class FailingEmbedder implements Embedder { diff --git a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java index 4c7e203fee5..e515bbf9275 100644 --- a/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java +++ b/metrics/src/main/java/ai/vespa/metrics/ContainerMetrics.java @@ -200,9 +200,10 @@ public enum ContainerMetrics implements VespaMetrics { SERVER_TOTAL_FAILED_RESPONSE_LATENCY("serverTotalFailedResponseLatency", Unit.MILLISECOND, "Total duration for execution of failed responses"), SERVER_TIME_TO_FIRST_BYTE("serverTimeToFirstByte", Unit.MILLISECOND, "Time from request has been received by the server until the first byte is returned to the client"), - SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started"); - + SERVER_STARTED_MILLIS("serverStartedMillis", Unit.MILLISECOND, "Time since the service was started"), + EMBEDDER_LATENCY("embedder.latency", Unit.MILLISECOND, "Time spent creating an embedding"), + EMBEDDER_SEQUENCE_LENGTH("embedder.sequence_length", Unit.BYTE, "Size of sequence produced by tokenizer"); private final String name; private final Unit unit; diff --git a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java index 4ec596f8ce7..b7ed7293d6c 100644 --- a/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java +++ b/metrics/src/main/java/ai/vespa/metrics/set/VespaMetricSet.java @@ -108,6 +108,10 @@ public class VespaMetricSet { // Routing layer metrics addMetric(metrics, RoutingLayerMetrics.WORKER_CONNECTIONS.max()); // Hosted Vespa only (routing layer) + // Embedders + addMetric(metrics, ContainerMetrics.EMBEDDER_LATENCY, EnumSet.of(max, sum, count)); + addMetric(metrics, ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH, EnumSet.of(max, sum, count)); + return metrics; } diff --git a/model-integration/pom.xml b/model-integration/pom.xml index 854e15298c6..d195a061c52 100644 --- a/model-integration/pom.xml +++ b/model-integration/pom.xml @@ -87,6 +87,18 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container-core</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>metrics</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + <dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <scope>provided</scope> diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index a12424c7d12..8b5b0f23b99 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -12,6 +12,8 @@ import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -40,11 +42,13 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { private final String outputName; private final PoolingStrategy poolingStrategy; + private final Embedder.Runtime runtime; private final WordPieceEmbedder tokenizer; private final OnnxEvaluator evaluator; @Inject - public BertBaseEmbedder(OnnxRuntime onnx, BertBaseEmbedderConfig config) { + public BertBaseEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, BertBaseEmbedderConfig config) { + this.runtime = runtime; maxTokens = config.transformerMaxTokens(); startSequenceToken = config.transformerStartSequenceToken(); endSequenceToken = config.transformerEndSequenceToken(); @@ -87,11 +91,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String text, Context context) { - return tokenizer.embed(text, context); + var start = Instant.now(); + var tokens = tokenize(text, context); + runtime.sampleSequenceLength(tokens.size(), context); + runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + return tokens; } @Override public Tensor embed(String text, Context context, TensorType type) { + var start = Instant.now(); if (type.dimensions().size() != 1) { throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension."); } @@ -99,11 +108,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { throw new IllegalArgumentException("Error in embedding to type '" + type + "': dimension should be indexed."); } List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens); - return embedTokens(tokens, type); + runtime.sampleSequenceLength(tokens.size(), context); + var embedding = embedTokens(tokens, type); + runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + return embedding; } @Override public void deconstruct() { evaluator.close(); } + private List<Integer> tokenize(String text, Context ctx) { return tokenizer.embed(text, ctx); } + Tensor embedTokens(List<Integer> tokens, TensorType type) { Tensor inputSequence = createTensorRepresentation(tokens, "d1"); Tensor attentionMask = createAttentionMask(inputSequence); @@ -129,7 +143,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { private List<Integer> embedWithSeparatorTokens(String text, Context context, int maxLength) { List<Integer> tokens = new ArrayList<>(); tokens.add(startSequenceToken); - tokens.addAll(embed(text, context)); + tokens.addAll(tokenize(text, context)); tokens.add(endSequenceToken); if (tokens.size() > maxLength) { tokens = tokens.subList(0, maxLength-1); diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java new file mode 100644 index 00000000000..905dd720ef0 --- /dev/null +++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package ai.vespa.embedding; + +import ai.vespa.metrics.ContainerMetrics; +import com.yahoo.component.annotation.Inject; +import com.yahoo.language.Language; +import com.yahoo.language.process.Embedder; +import com.yahoo.metrics.simple.Gauge; +import com.yahoo.metrics.simple.MetricReceiver; +import com.yahoo.metrics.simple.Point; + +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; + +/** + * @author bjorncs + */ +public class EmbedderRuntime implements Embedder.Runtime { + + private final Gauge embedLatency; + private final Gauge sequenceLength; + private final Map<MetricDimensions, Point> metricPointCache = new HashMap<>(); + + @Inject + public EmbedderRuntime(MetricReceiver metrics) { + embedLatency = metrics.declareGauge(ContainerMetrics.EMBEDDER_LATENCY.baseName()); + sequenceLength = metrics.declareGauge(ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH.baseName()); + } + + @Override + public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) { + embedLatency.sample(latency.toMillis(), metricPoint(ctx)); + } + + @Override + public void sampleSequenceLength(long length, Embedder.Context ctx) { + sequenceLength.sample(length, metricPoint(ctx)); + } + + private Point metricPoint(Embedder.Context ctx) { + var dimensions = new MetricDimensions(ctx.getEmbedderId(), ctx.getLanguage(), ctx.getDestination()); + return metricPointCache.computeIfAbsent( + dimensions, d -> new Point(Map.of("embedder", d.embedderId(), + "language", d.language().languageCode(), + "destination", d.destination()))); + } + + private record MetricDimensions(String embedderId, Language language, String destination) {} + +} diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java index b035541bb0f..73d6424739e 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java @@ -16,6 +16,8 @@ import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.logging.Logger; @@ -27,6 +29,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { private static final Logger log = Logger.getLogger(HuggingFaceEmbedder.class.getName()); + private final Embedder.Runtime runtime; private final String inputIdsName; private final String attentionMaskName; private final String tokenTypeIdsName; @@ -37,7 +40,8 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { private final PoolingStrategy poolingStrategy; @Inject - public HuggingFaceEmbedder(OnnxRuntime onnx, HuggingFaceEmbedderConfig config) { + public HuggingFaceEmbedder(OnnxRuntime onnx, Embedder.Runtime runtime, HuggingFaceEmbedderConfig config) { + this.runtime = runtime; inputIdsName = config.transformerInputIds(); attentionMaskName = config.transformerAttentionMask(); tokenTypeIdsName = config.transformerTokenTypeIds(); @@ -87,7 +91,11 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String s, Context context) { - return tokenizer.embed(s, context); + var start = Instant.now(); + var tokens = tokenizer.embed(s, context); + runtime.sampleSequenceLength(tokens.size(), context); + runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + return tokens; } @Override @@ -98,7 +106,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public Tensor embed(String s, Context context, TensorType tensorType) { + var start = Instant.now(); var encoding = tokenizer.encode(s, context.getLanguage()); + runtime.sampleSequenceLength(encoding.ids().size(), context); Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1"); Tensor attentionMask = createTensorRepresentation(encoding.attentionMask(), "d1"); Tensor tokenTypeIds = tokenTypeIdsName.isEmpty() ? null : createTensorRepresentation(encoding.typeIds(), "d1"); @@ -117,7 +127,9 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { Map<String, Tensor> outputs = evaluator.evaluate(inputs); Tensor tokenEmbeddings = outputs.get(outputName); var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask); - return normalize ? normalize(result, tensorType) : result; + var normalized = normalize ? normalize(result, tensorType) : result; + runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + return normalized; } Tensor normalize(Tensor embedding, TensorType tensorType) { diff --git a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java index 329b87cacd1..a0964eb5812 100644 --- a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java +++ b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java @@ -3,6 +3,7 @@ package ai.vespa.embedding; import ai.vespa.modelintegration.evaluator.OnnxRuntime; import com.yahoo.config.ModelReference; import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.language.process.Embedder; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; import org.junit.Test; @@ -69,7 +70,7 @@ public class BertBaseEmbedderTest { } private static BertBaseEmbedder newBertBaseEmbedder(BertBaseEmbedderConfig cfg) { - return new BertBaseEmbedder(new OnnxRuntime(), cfg); + return new BertBaseEmbedder(new OnnxRuntime(), Embedder.Runtime.testInstance(), cfg); } } |