From ae674d6d002ca0f99b401e1215d45d188ba81e12 Mon Sep 17 00:00:00 2001 From: Bjørn Christian Seime Date: Fri, 25 Aug 2023 09:49:50 +0200 Subject: Allow sampling of fractional millis --- .../src/main/java/ai/vespa/embedding/BertBaseEmbedder.java | 10 ++++------ .../src/main/java/ai/vespa/embedding/EmbedderRuntime.java | 5 ++--- .../ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java | 10 ++++------ 3 files changed, 10 insertions(+), 15 deletions(-) (limited to 'model-integration/src/main/java') diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index 8b5b0f23b99..2c4f09b3821 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -12,8 +12,6 @@ import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; -import java.time.Duration; -import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -91,16 +89,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { @Override public List embed(String text, Context context) { - var start = Instant.now(); + var start = System.nanoTime(); var tokens = tokenize(text, context); runtime.sampleSequenceLength(tokens.size(), context); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return tokens; } @Override public Tensor embed(String text, Context context, TensorType type) { - var start = Instant.now(); + var start = System.nanoTime(); if (type.dimensions().size() != 1) { throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension."); } @@ -110,7 +108,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder { List tokens = embedWithSeparatorTokens(text, context, maxTokens); runtime.sampleSequenceLength(tokens.size(), context); var embedding = embedTokens(tokens, type); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return embedding; } diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java index 905dd720ef0..45068db67f4 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java +++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java @@ -10,7 +10,6 @@ import com.yahoo.metrics.simple.Gauge; import com.yahoo.metrics.simple.MetricReceiver; import com.yahoo.metrics.simple.Point; -import java.time.Duration; import java.util.HashMap; import java.util.Map; @@ -30,8 +29,8 @@ public class EmbedderRuntime implements Embedder.Runtime { } @Override - public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) { - embedLatency.sample(latency.toMillis(), metricPoint(ctx)); + public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) { + embedLatency.sample(millis, metricPoint(ctx)); } @Override diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java index 73d6424739e..ab8d33dbf17 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java @@ -16,8 +16,6 @@ import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; import java.nio.file.Paths; -import java.time.Duration; -import java.time.Instant; import java.util.List; import java.util.Map; import java.util.logging.Logger; @@ -91,10 +89,10 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public List embed(String s, Context context) { - var start = Instant.now(); + var start = System.nanoTime(); var tokens = tokenizer.embed(s, context); runtime.sampleSequenceLength(tokens.size(), context); - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return tokens; } @@ -106,7 +104,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { @Override public Tensor embed(String s, Context context, TensorType tensorType) { - var start = Instant.now(); + var start = System.nanoTime(); var encoding = tokenizer.encode(s, context.getLanguage()); runtime.sampleSequenceLength(encoding.ids().size(), context); Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1"); @@ -128,7 +126,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder { Tensor tokenEmbeddings = outputs.get(outputName); var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask); var normalized = normalize ? normalize(result, tensorType) : result; - runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context); + runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context); return normalized; } -- cgit v1.2.3