diff options
Diffstat (limited to 'metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java')
-rw-r--r-- | metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java | 105 |
1 files changed, 61 insertions, 44 deletions
diff --git a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java index 9a688364b38..3f0ea4743ad 100644 --- a/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java +++ b/metrics-proxy/src/main/java/ai/vespa/metricsproxy/service/SystemPoller.java @@ -28,16 +28,16 @@ import java.util.logging.Logger; public class SystemPoller { private static final Logger log = Logger.getLogger(SystemPoller.class.getName()); + private static final int memoryTypeVirtual = 0; + private static final int memoryTypeResident = 1; private final int pollingIntervalSecs; private final List<VespaService> services; - - private final int memoryTypeVirtual = 0; - private final int memoryTypeResident = 1; private final Map<VespaService, Long> lastCpuJiffiesMetrics = new ConcurrentHashMap<>(); private final Timer systemPollTimer; + private final GetJiffies jiffiesInterface; - private JiffiesAndCpus lastTotalCpuJiffies = null; + private JiffiesAndCpus lastTotalCpuJiffies; static class JiffiesAndCpus { final long jiffies; @@ -56,11 +56,30 @@ public class SystemPoller { : new JiffiesAndCpus(); } } + interface GetJiffies { + JiffiesAndCpus getTotalSystemJiffies(); + long getJiffies(VespaService service); + } public SystemPoller(List<VespaService> services, int pollingIntervalSecs) { this.services = services; this.pollingIntervalSecs = pollingIntervalSecs; systemPollTimer = new Timer("systemPollTimer", true); + jiffiesInterface = new GetJiffies() { + @Override + public JiffiesAndCpus getTotalSystemJiffies() { + return SystemPoller.getTotalSystemJiffies(); + } + + @Override + public long getJiffies(VespaService service) { + return SystemPoller.getPidJiffies(service); + } + }; + lastTotalCpuJiffies = jiffiesInterface.getTotalSystemJiffies(); + for (VespaService s : services) { + lastCpuJiffiesMetrics.put(s, jiffiesInterface.getJiffies(s)); + } } void stop() { @@ -74,7 +93,7 @@ public class SystemPoller { * @param service The instance to get memory usage for * @return array[0] = memoryResident, array[1] = memoryVirtual (kB units) */ - long[] getMemoryUsage(VespaService service) { + static long[] getMemoryUsage(VespaService service) { long[] size = new long[2]; BufferedReader br; int pid = service.getPid(); @@ -111,7 +130,6 @@ public class SystemPoller { */ void poll() { long startTime = System.currentTimeMillis(); - boolean someAlive = false; /* Don't do any work if there are no known services */ if (services.isEmpty()) { @@ -121,62 +139,59 @@ public class SystemPoller { log.log(Level.FINE, () -> "Monitoring system metrics for " + services.size() + " services"); - JiffiesAndCpus sysJiffies = getTotalSystemJiffies(); - for (VespaService s : services) { - + boolean someAlive = services.stream().anyMatch(VespaService::isAlive); + lastTotalCpuJiffies = updateMetrics(lastTotalCpuJiffies, startTime/1000, jiffiesInterface, services, lastCpuJiffiesMetrics); - if(s.isAlive()) { - someAlive = true; - } + // If none of the services were alive, reschedule in a short time + if (!someAlive) { + reschedule(System.currentTimeMillis() - startTime); + } else { + schedule(); + } + } + static JiffiesAndCpus updateMetrics(JiffiesAndCpus prevTotalJiffies, long timeStamp, GetJiffies getJiffies, + List<VespaService> services, Map<VespaService, Long> lastCpuJiffiesMetrics) { + JiffiesAndCpus sysJiffies = getJiffies.getTotalSystemJiffies(); + JiffiesAndCpus sysJiffiesDiff = sysJiffies.diff(prevTotalJiffies); + for (VespaService s : services) { Metrics metrics = new Metrics(); log.log(Level.FINE, () -> "Current size of system metrics for service " + s + " is " + metrics.size()); long[] size = getMemoryUsage(s); log.log(Level.FINE, () -> "Updating memory metric for service " + s); - long timeStamp = startTime / 1000; metrics.add(new Metric(MetricId.toMetricId("memory_virt"), size[memoryTypeVirtual], timeStamp)); metrics.add(new Metric(MetricId.toMetricId("memory_rss"), size[memoryTypeResident], timeStamp)); - long procJiffies = getPidJiffies(s); - if ((lastTotalCpuJiffies != null) && lastCpuJiffiesMetrics.containsKey(s)) { - long last = lastCpuJiffiesMetrics.get(s); - long diff = procJiffies - last; + long procJiffies = getJiffies.getJiffies(s); + long last = lastCpuJiffiesMetrics.get(s); + long diff = procJiffies - last; - if (diff >= 0) { - JiffiesAndCpus sysJiffiesDiff = sysJiffies.diff(lastTotalCpuJiffies); - metrics.add(new Metric(MetricId.toMetricId("cpu"), 100 * ((double) diff) / sysJiffiesDiff.normalizedJiffies(), timeStamp)); - metrics.add(new Metric(MetricId.toMetricId("cpu.util"), 100 * ((double) diff) / sysJiffiesDiff.jiffies, timeStamp)); - } + if (diff >= 0) { + metrics.add(new Metric(MetricId.toMetricId("cpu"), 100 * ((double) diff) / sysJiffiesDiff.normalizedJiffies(), timeStamp)); + metrics.add(new Metric(MetricId.toMetricId("cpu.util"), 100 * ((double) diff) / sysJiffiesDiff.jiffies, timeStamp)); } lastCpuJiffiesMetrics.put(s, procJiffies); s.setSystemMetrics(metrics); } - - lastTotalCpuJiffies = sysJiffies; - - // If none of the services were alive, reschedule in a short time - if (!someAlive) { - reschedule(System.currentTimeMillis() - startTime); - } else { - schedule(); - } + return sysJiffies; } - long getPidJiffies(VespaService service) { - BufferedReader in; - String line; - String[] elems; + static long getPidJiffies(VespaService service) { int pid = service.getPid(); - try { - in = new BufferedReader(new FileReader("/proc/" + pid + "/stat")); + BufferedReader in = new BufferedReader(new FileReader("/proc/" + pid + "/stat")); + return getPidJiffies(in); } catch (FileNotFoundException ex) { log.log(Level.FINE, () -> "Unable to find pid " + pid + " in proc directory, for service " + service.getInstanceName()); service.setAlive(false); return 0; } + } + static long getPidJiffies(BufferedReader in) { + String line; + String[] elems; try { line = in.readLine(); @@ -192,19 +207,21 @@ public class SystemPoller { return Long.parseLong(elems[13]) + Long.parseLong(elems[14]); } - private JiffiesAndCpus getTotalSystemJiffies() { - BufferedReader in; - String line; - ArrayList<CpuJiffies> jiffies = new ArrayList<>(); - CpuJiffies total = null; - + private static JiffiesAndCpus getTotalSystemJiffies() { try { - in = new BufferedReader(new FileReader("/proc/stat")); + BufferedReader in = new BufferedReader(new FileReader("/proc/stat")); + return getTotalSystemJiffies(in); } catch (FileNotFoundException ex) { log.log(Level.SEVERE, "Unable to open stat file", ex); return new JiffiesAndCpus(); } + } + static JiffiesAndCpus getTotalSystemJiffies(BufferedReader in) { + ArrayList<CpuJiffies> jiffies = new ArrayList<>(); + CpuJiffies total = null; + try { + String line; while ((line = in.readLine()) != null) { if (line.startsWith("cpu ")) { total = new CpuJiffies(line); |