aboutsummaryrefslogtreecommitdiffstats
path: root/model-integration
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-25 09:49:50 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-25 09:49:50 +0200
commitae674d6d002ca0f99b401e1215d45d188ba81e12 (patch)
tree2906e6970527db4401cbd1165ad75ceecde0a254 /model-integration
parent3ddce3c158941cb8eabdb391d207fe004095c434 (diff)
Allow sampling of fractional millis
Diffstat (limited to 'model-integration')
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java10
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java5
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java10
3 files changed, 10 insertions, 15 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
index 8b5b0f23b99..2c4f09b3821 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
@@ -12,8 +12,6 @@ import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
-import java.time.Duration;
-import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -91,16 +89,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String text, Context context) {
- var start = Instant.now();
+ var start = System.nanoTime();
var tokens = tokenize(text, context);
runtime.sampleSequenceLength(tokens.size(), context);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return tokens;
}
@Override
public Tensor embed(String text, Context context, TensorType type) {
- var start = Instant.now();
+ var start = System.nanoTime();
if (type.dimensions().size() != 1) {
throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension.");
}
@@ -110,7 +108,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens);
runtime.sampleSequenceLength(tokens.size(), context);
var embedding = embedTokens(tokens, type);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return embedding;
}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
index 905dd720ef0..45068db67f4 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
@@ -10,7 +10,6 @@ import com.yahoo.metrics.simple.Gauge;
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.metrics.simple.Point;
-import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
@@ -30,8 +29,8 @@ public class EmbedderRuntime implements Embedder.Runtime {
}
@Override
- public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) {
- embedLatency.sample(latency.toMillis(), metricPoint(ctx));
+ public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) {
+ embedLatency.sample(millis, metricPoint(ctx));
}
@Override
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index 73d6424739e..ab8d33dbf17 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -16,8 +16,6 @@ import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import java.nio.file.Paths;
-import java.time.Duration;
-import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
@@ -91,10 +89,10 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String s, Context context) {
- var start = Instant.now();
+ var start = System.nanoTime();
var tokens = tokenizer.embed(s, context);
runtime.sampleSequenceLength(tokens.size(), context);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return tokens;
}
@@ -106,7 +104,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public Tensor embed(String s, Context context, TensorType tensorType) {
- var start = Instant.now();
+ var start = System.nanoTime();
var encoding = tokenizer.encode(s, context.getLanguage());
runtime.sampleSequenceLength(encoding.ids().size(), context);
Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1");
@@ -128,7 +126,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
Tensor tokenEmbeddings = outputs.get(outputName);
var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask);
var normalized = normalize ? normalize(result, tensorType) : result;
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return normalized;
}