1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package ai.vespa.embedding;
import ai.vespa.metrics.ContainerMetrics;
import com.yahoo.component.annotation.Inject;
import com.yahoo.language.Language;
import com.yahoo.language.process.Embedder;
import com.yahoo.metrics.simple.Gauge;
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.metrics.simple.Point;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author bjorncs
*/
public class EmbedderRuntime implements Embedder.Runtime {
private final Gauge embedLatency;
private final Gauge sequenceLength;
private final Map<MetricDimensions, Point> metricPointCache = new ConcurrentHashMap<>();
@Inject
public EmbedderRuntime(MetricReceiver metrics) {
embedLatency = metrics.declareGauge(ContainerMetrics.EMBEDDER_LATENCY.baseName());
sequenceLength = metrics.declareGauge(ContainerMetrics.EMBEDDER_SEQUENCE_LENGTH.baseName());
}
@Override
public void sampleEmbeddingLatency(double millis, Embedder.Context ctx) {
embedLatency.sample(millis, metricPoint(ctx));
}
@Override
public void sampleSequenceLength(long length, Embedder.Context ctx) {
sequenceLength.sample(length, metricPoint(ctx));
}
private Point metricPoint(Embedder.Context ctx) {
var dimensions = new MetricDimensions(ctx.getEmbedderId(), ctx.getLanguage(), ctx.getDestination());
return metricPointCache.computeIfAbsent(
dimensions, d -> new Point(Map.of("embedder", d.embedderId(),
"language", d.language().languageCode(),
"destination", d.destination())));
}
private record MetricDimensions(String embedderId, Language language, String destination) {}
}
|