diff options
5 files changed, 13 insertions, 19 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index 680aec3ff57..1ffb879e57e 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -373,7 +373,7 @@ "abstract" ], "methods" : [ - "public abstract void sampleEmbeddingLatency(java.time.Duration, com.yahoo.language.process.Embedder$Context)", + "public abstract void sampleEmbeddingLatency(double, com.yahoo.language.process.Embedder$Context)", "public abstract void sampleSequenceLength(long, com.yahoo.language.process.Embedder$Context)", "public static com.yahoo.language.process.Embedder$Runtime testInstance()" ], diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java index 1d2e1bcc847..98030a4f054 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java @@ -6,7 +6,6 @@ import com.yahoo.language.Language; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; -import java.time.Duration; import java.util.List; import java.util.Map; @@ -72,13 +71,13 @@ public interface Embedder { @Beta interface Runtime { /** Sample latency metric for embedding */ - void sampleEmbeddingLatency(Duration latency, Context ctx); + void sampleEmbeddingLatency(double millis, Context ctx); /** Sample sequence length metric for embedding */ void sampleSequenceLength(long length, Context ctx); static Runtime testInstance() { return new Runtime() { - @Override public void sampleEmbeddingLatency(Duration latency, Context ctx) { } + @Override public void sampleEmbeddingLatency(double millis, Context ctx) { } @Override public void sampleSequenceLength(long length, Context ctx) { } }; } diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index 8b5b0f23b99..2c4f09b3821 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -12,8 +12,6 @@ import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; -import java.time.Duration; -import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -91,16 +89,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String text, Context context) { - var start = Instant.now(); + var start = System.nanoTime(); var tokens = tokenize(text, context); runtime.sampleSequenceLength(tokens.size(), context); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return tokens; } @Override public Tensor embed(String text, Context context, TensorType type) { - var start = Instant.now(); + var start = System.nanoTime(); if (type.dimensions().size() != 1) { throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension."); } @@ -110,7 +108,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens); runtime.sampleSequenceLength(tokens.size(), context); var embedding = embedTokens(tokens, type); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return embedding; } diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java index 905dd720ef0..45068db67f4 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java +++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java @@ -10,7 +10,6 @@ import com.yahoo.metrics.simple.Gauge; import com.yahoo.metrics.simple.MetricReceiver; import com.yahoo.metrics.simple.Point; -import java.time.Duration; import java.util.HashMap; import java.util.Map; @@ -30,8 +29,8 @@ public class EmbedderRuntime implements Embedder.Runtime { } @Override - public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) { - embedLatency.sample(latency.toMillis(), metricPoint(ctx)); + public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) { + embedLatency.sample(millis, metricPoint(ctx)); } @Override diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java index 73d6424739e..ab8d33dbf17 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java @@ -16,8 +16,6 @@ import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; import java.nio.file.Paths; -import java.time.Duration; -import java.time.Instant; import java.util.List; import java.util.Map; import java.util.logging.Logger; @@ -91,10 +89,10 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public List<Integer> embed(String s, Context context) { - var start = Instant.now(); + var start = System.nanoTime(); var tokens = tokenizer.embed(s, context); runtime.sampleSequenceLength(tokens.size(), context); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return tokens; } @@ -106,7 +104,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public Tensor embed(String s, Context context, TensorType tensorType) { - var start = Instant.now(); + var start = System.nanoTime(); var encoding = tokenizer.encode(s, context.getLanguage()); runtime.sampleSequenceLength(encoding.ids().size(), context); Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1"); @@ -128,7 +126,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { Tensor tokenEmbeddings = outputs.get(outputName); var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask); var normalized = normalize ? normalize(result, tensorType) : result; - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return normalized; } |