aboutsummaryrefslogtreecommitdiffstats
path: root/metrics-proxy
diff options
context:
space:
mode:
Diffstat (limited to 'metrics-proxy')
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java40
-rw-r--r--metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java7
2 files changed, 30 insertions, 17 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
index c548d187569..27f86b0d503 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java
@@ -5,6 +5,9 @@ import ai.vespa.metricsproxy.metric.Metric;
import ai.vespa.metricsproxy.metric.Metrics;
import ai.vespa.metricsproxy.metric.model.MetricId;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
import java.util.logging.Level;
import java.io.BufferedReader;
@@ -35,7 +38,7 @@ public class SystemPoller {
private static final MetricId MEMORY_VIRT = MetricId.toMetricId("memory_virt");
private static final MetricId MEMORY_RSS = MetricId.toMetricId("memory_rss");
- private final int pollingIntervalSecs;
+ private final Duration interval;
private final List<VespaService> services;
private final Map<VespaService, Long> lastCpuJiffiesMetrics = new ConcurrentHashMap<>();
private final Timer systemPollTimer;
@@ -70,9 +73,9 @@ public class SystemPoller {
long getJiffies(VespaService service);
}
- public SystemPoller(List<VespaService> services, int pollingIntervalSecs) {
+ public SystemPoller(List<VespaService> services, Duration interval) {
this.services = services;
- this.pollingIntervalSecs = pollingIntervalSecs;
+ this.interval = interval;
systemPollTimer = new Timer("systemPollTimer", true);
jiffiesInterface = new GetJiffies() {
@Override
@@ -138,7 +141,7 @@ public class SystemPoller {
* Poll services for system metrics
*/
void poll() {
- long startTime = System.currentTimeMillis();
+ Instant startTime = Instant.now();
/* Don't do any work if there are no known services */
if (services.isEmpty()) {
@@ -149,11 +152,11 @@ public class SystemPoller {
log.log(Level.FINE, () -> "Monitoring system metrics for " + services.size() + " services");
boolean someAlive = services.stream().anyMatch(VespaService::isAlive);
- lastTotalCpuJiffies = updateMetrics(lastTotalCpuJiffies, startTime/1000, jiffiesInterface, services, lastCpuJiffiesMetrics);
+ lastTotalCpuJiffies = updateMetrics(lastTotalCpuJiffies, interval.getSeconds(), jiffiesInterface, services, lastCpuJiffiesMetrics);
// If none of the services were alive, reschedule in a short time
if (!someAlive) {
- reschedule(System.currentTimeMillis() - startTime);
+ reschedule(Duration.between(startTime, Instant.now()));
} else {
schedule();
}
@@ -161,6 +164,10 @@ public class SystemPoller {
static JiffiesAndCpus updateMetrics(JiffiesAndCpus prevTotalJiffies, long timeStamp, GetJiffies getJiffies,
List<VespaService> services, Map<VespaService, Long> lastCpuJiffiesMetrics) {
+ Map<VespaService, Long> currentServiceJiffies = new HashMap<>();
+ for (VespaService s : services) {
+ currentServiceJiffies.put(s, getJiffies.getJiffies(s));
+ }
JiffiesAndCpus sysJiffies = getJiffies.getTotalSystemJiffies();
JiffiesAndCpus sysJiffiesDiff = sysJiffies.diff(prevTotalJiffies);
log.log(Level.FINE, () -> "Total jiffies: " + sysJiffies.jiffies + " - " + prevTotalJiffies.jiffies + " = " + sysJiffiesDiff.jiffies);
@@ -173,7 +180,7 @@ public class SystemPoller {
metrics.add(new Metric(MEMORY_VIRT, size[memoryTypeVirtual], timeStamp));
metrics.add(new Metric(MEMORY_RSS, size[memoryTypeResident], timeStamp));
- long procJiffies = getJiffies.getJiffies(s);
+ long procJiffies = currentServiceJiffies.get(s);
long last = lastCpuJiffiesMetrics.get(s);
long diff = procJiffies - last;
@@ -253,24 +260,27 @@ public class SystemPoller {
: new JiffiesAndCpus();
}
- private void schedule(long time) {
+ void schedule(Duration time) {
try {
- systemPollTimer.schedule(new PollTask(this), time);
+ systemPollTimer.schedule(new PollTask(this), time.toMillis());
} catch(IllegalStateException e){
log.info("Tried to schedule task, but timer was already shut down.");
}
}
- public void schedule() {
- schedule(pollingIntervalSecs * 1000L);
+ void schedule() {
+ schedule(interval);
}
- private void reschedule(long skew) {
- long sleep = (pollingIntervalSecs * 1000L) - skew;
+ private void reschedule(Duration skew) {
+ Duration sleep = interval.minus(skew);
// Don't sleep less than 1 min
- sleep = Math.max(60 * 1000, sleep);
- schedule(sleep);
+ if ( sleep.compareTo(Duration.ofMinutes(1)) < 0) {
+ schedule(Duration.ofMinutes(1));
+ } else {
+ schedule(sleep);
+ }
}
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java
index 38a0ea5ed2d..05914c40469 100644
--- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java
+++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPollerProvider.java
@@ -4,6 +4,8 @@ package ai.vespa.metricsproxy.service;
import ai.vespa.metricsproxy.core.MonitoringConfig;
import com.yahoo.container.di.componentgraph.Provider;
+import java.time.Duration;
+
/**
* @author gjoranv
*/
@@ -17,8 +19,9 @@ public class SystemPollerProvider implements Provider<SystemPoller> {
*/
public SystemPollerProvider (VespaServices services, MonitoringConfig monitoringConfig) {
if (runningOnLinux()) {
- poller = new SystemPoller(services.getVespaServices(), 60 * monitoringConfig.intervalMinutes());
- poller.poll();
+ Duration interval = Duration.ofMinutes(monitoringConfig.intervalMinutes());
+ poller = new SystemPoller(services.getVespaServices(), interval);
+ poller.schedule(Duration.ofSeconds(5));
} else {
poller = null;
}