aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-25 09:49:50 +0200
committerBjørn Christian Seime <bjorncs@yahooinc.com>2023-08-25 09:49:50 +0200
commitae674d6d002ca0f99b401e1215d45d188ba81e12 (patch)
tree2906e6970527db4401cbd1165ad75ceecde0a254
parent3ddce3c158941cb8eabdb391d207fe004095c434 (diff)
Allow sampling of fractional millis
-rw-r--r--linguistics/abi-spec.json2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Embedder.java5
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java10
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java5
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java10
5 files changed, 13 insertions, 19 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index 680aec3ff57..1ffb879e57e 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -373,7 +373,7 @@
"abstract"
],
"methods" : [
- "public abstract void sampleEmbeddingLatency(java.time.Duration, com.yahoo.language.process.Embedder$Context)",
+ "public abstract void sampleEmbeddingLatency(double, com.yahoo.language.process.Embedder$Context)",
"public abstract void sampleSequenceLength(long, com.yahoo.language.process.Embedder$Context)",
"public static com.yahoo.language.process.Embedder$Runtime testInstance()"
],
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
index 1d2e1bcc847..98030a4f054 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Embedder.java
@@ -6,7 +6,6 @@ import com.yahoo.language.Language;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
-import java.time.Duration;
import java.util.List;
import java.util.Map;
@@ -72,13 +71,13 @@ public interface Embedder {
@Beta
interface Runtime {
/** Sample latency metric for embedding */
- void sampleEmbeddingLatency(Duration latency, Context ctx);
+ void sampleEmbeddingLatency(double millis, Context ctx);
/** Sample sequence length metric for embedding */
void sampleSequenceLength(long length, Context ctx);
static Runtime testInstance() {
return new Runtime() {
- @Override public void sampleEmbeddingLatency(Duration latency, Context ctx) { }
+ @Override public void sampleEmbeddingLatency(double millis, Context ctx) { }
@Override public void sampleSequenceLength(long length, Context ctx) { }
};
}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
index 8b5b0f23b99..2c4f09b3821 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java
@@ -12,8 +12,6 @@ import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
-import java.time.Duration;
-import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -91,16 +89,16 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String text, Context context) {
- var start = Instant.now();
+ var start = System.nanoTime();
var tokens = tokenize(text, context);
runtime.sampleSequenceLength(tokens.size(), context);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return tokens;
}
@Override
public Tensor embed(String text, Context context, TensorType type) {
- var start = Instant.now();
+ var start = System.nanoTime();
if (type.dimensions().size() != 1) {
throw new IllegalArgumentException("Error in embedding to type '" + type + "': should only have one dimension.");
}
@@ -110,7 +108,7 @@ public class BertBaseEmbedder extends AbstractComponent implements Embedder {
List<Integer> tokens = embedWithSeparatorTokens(text, context, maxTokens);
runtime.sampleSequenceLength(tokens.size(), context);
var embedding = embedTokens(tokens, type);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return embedding;
}
diff --git a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
index 905dd720ef0..45068db67f4 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/EmbedderRuntime.java
@@ -10,7 +10,6 @@ import com.yahoo.metrics.simple.Gauge;
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.metrics.simple.Point;
-import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
@@ -30,8 +29,8 @@ public class EmbedderRuntime implements Embedder.Runtime {
}
@Override
- public void sampleEmbeddingLatency(Duration latency, Embedder.Context ctx) {
- embedLatency.sample(latency.toMillis(), metricPoint(ctx));
+ public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) {
+ embedLatency.sample(millis, metricPoint(ctx));
}
@Override
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
index 73d6424739e..ab8d33dbf17 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceEmbedder.java
@@ -16,8 +16,6 @@ import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import java.nio.file.Paths;
-import java.time.Duration;
-import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
@@ -91,10 +89,10 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public List<Integer> embed(String s, Context context) {
- var start = Instant.now();
+ var start = System.nanoTime();
var tokens = tokenizer.embed(s, context);
runtime.sampleSequenceLength(tokens.size(), context);
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return tokens;
}
@@ -106,7 +104,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
@Override
public Tensor embed(String s, Context context, TensorType tensorType) {
- var start = Instant.now();
+ var start = System.nanoTime();
var encoding = tokenizer.encode(s, context.getLanguage());
runtime.sampleSequenceLength(encoding.ids().size(), context);
Tensor inputSequence = createTensorRepresentation(encoding.ids(), "d1");
@@ -128,7 +126,7 @@ public class HuggingFaceEmbedder extends AbstractComponent implements Embedder {
Tensor tokenEmbeddings = outputs.get(outputName);
var result = poolingStrategy.toSentenceEmbedding(tensorType, tokenEmbeddings, attentionMask);
var normalized = normalize ? normalize(result, tensorType) : result;
- runtime.sampleEmbeddingLatency(Duration.between(start, Instant.now()), context);
+ runtime.sampleEmbeddingLatency((System.nanoTime() - start)/1_000_000d, context);
return normalized;
}