aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon.hallingstad@gmail.com>2022-08-06 15:42:50 +0200
committerGitHub <noreply@github.com>2022-08-06 15:42:50 +0200
commite0744af00929a345a481a2f8a2e086b61bd53f55 (patch)
tree66c90db4cd32dc70ffb21f63e106d2449d0fdc0f
parent4b50e2dd9b56d08f3b6bf5a131c8b07c829d823f (diff)
parente77c830792a9e6a452e6f632e90afb1c3f39ab88 (diff)
Merge pull request #23582 from vespa-engine/hakonhall/memory-overhead-metricv8.30.50
Memory overhead metric
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java52
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java12
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java42
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java8
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java7
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java4
6 files changed, 104 insertions, 21 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index 6de4f8aede1..a371cdcde25 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerStats;
import com.yahoo.vespa.hosted.node.admin.container.metrics.Counter;
import com.yahoo.vespa.hosted.node.admin.container.metrics.Dimensions;
import com.yahoo.vespa.hosted.node.admin.container.metrics.Gauge;
@@ -11,12 +12,14 @@ import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextManager;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentFactory;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentScheduler;
+import java.nio.file.FileSystem;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
@@ -42,24 +45,28 @@ public class NodeAdminImpl implements NodeAdmin {
private Instant startOfFreezeConvergence;
private final Map<String, NodeAgentWithScheduler> nodeAgentWithSchedulerByHostname = new ConcurrentHashMap<>();
+ private final ProcMeminfoReader procMeminfoReader;
private final Gauge jvmHeapUsed;
private final Gauge jvmHeapFree;
private final Gauge jvmHeapTotal;
+ private final Gauge memoryOverhead;
+ private final Gauge containerCount;
private final Counter numberOfUnhandledExceptions;
- public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, Metrics metrics, Clock clock) {
+ public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, Metrics metrics, Clock clock, FileSystem fileSystem) {
this(nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext),
- metrics, clock, NODE_AGENT_FREEZE_TIMEOUT, NODE_AGENT_SPREAD);
+ metrics, clock, NODE_AGENT_FREEZE_TIMEOUT, NODE_AGENT_SPREAD, new ProcMeminfoReader(fileSystem));
}
public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, Metrics metrics,
- Clock clock, Duration freezeTimeout, Duration spread) {
+ Clock clock, Duration freezeTimeout, Duration spread, ProcMeminfoReader procMeminfoReader) {
this(nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext),
- metrics, clock, freezeTimeout, spread);
+ metrics, clock, freezeTimeout, spread, procMeminfoReader);
}
NodeAdminImpl(NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory,
- Metrics metrics, Clock clock, Duration freezeTimeout, Duration spread) {
+ Metrics metrics, Clock clock, Duration freezeTimeout, Duration spread,
+ ProcMeminfoReader procMeminfoReader) {
this.nodeAgentWithSchedulerFactory = nodeAgentWithSchedulerFactory;
this.clock = clock;
this.freezeTimeout = freezeTimeout;
@@ -71,9 +78,12 @@ public class NodeAdminImpl implements NodeAdmin {
this.numberOfUnhandledExceptions = metrics.declareCounter("unhandled_exceptions",
new Dimensions(Map.of("src", "node-agents")));
+ this.procMeminfoReader = procMeminfoReader;
this.jvmHeapUsed = metrics.declareGauge("mem.heap.used");
this.jvmHeapFree = metrics.declareGauge("mem.heap.free");
this.jvmHeapTotal = metrics.declareGauge("mem.heap.total");
+ this.memoryOverhead = metrics.declareGauge("mem.system.overhead");
+ this.containerCount = metrics.declareGauge("container.count");
}
@Override
@@ -103,21 +113,33 @@ public class NodeAdminImpl implements NodeAdmin {
@Override
public void updateMetrics(boolean isSuspended) {
+ long numContainers = 0;
+ long totalContainerMemoryBytes = 0;
+
for (NodeAgentWithScheduler nodeAgentWithScheduler : nodeAgentWithSchedulerByHostname.values()) {
int count = nodeAgentWithScheduler.getAndResetNumberOfUnhandledExceptions();
if (!isSuspended) numberOfUnhandledExceptions.add(count);
- nodeAgentWithScheduler.updateContainerNodeMetrics(isSuspended);
+ Optional<ContainerStats> containerStats = nodeAgentWithScheduler.updateContainerNodeMetrics(isSuspended);
+ if (containerStats.isPresent()) {
+ ++numContainers;
+ totalContainerMemoryBytes += containerStats.get().getMemoryStats().getUsage();
+ }
}
+ Runtime runtime = Runtime.getRuntime();
+ runtime.gc();
+ long freeMemory = runtime.freeMemory();
+ long totalMemory = runtime.totalMemory();
+ long usedMemory = totalMemory - freeMemory;
+ jvmHeapFree.sample(freeMemory);
+ jvmHeapUsed.sample(usedMemory);
+ jvmHeapTotal.sample(totalMemory);
+
+ // No container stats are found while suspended, so skip setting these if so.
if (!isSuspended) {
- Runtime runtime = Runtime.getRuntime();
- runtime.gc();
- long freeMemory = runtime.freeMemory();
- long totalMemory = runtime.totalMemory();
- long usedMemory = totalMemory - freeMemory;
- jvmHeapFree.sample(freeMemory);
- jvmHeapUsed.sample(usedMemory);
- jvmHeapTotal.sample(totalMemory);
+ containerCount.sample(numContainers);
+ ProcMeminfo meminfo = procMeminfoReader.read();
+ memoryOverhead.sample(meminfo.memTotalBytes() - meminfo.memAvailableBytes() - totalContainerMemoryBytes);
}
}
@@ -206,7 +228,7 @@ public class NodeAdminImpl implements NodeAdmin {
void start() { nodeAgent.start(currentContext()); }
void stopForHostSuspension() { nodeAgent.stopForHostSuspension(currentContext()); }
void stopForRemoval() { nodeAgent.stopForRemoval(currentContext()); }
- void updateContainerNodeMetrics(boolean isSuspended) { nodeAgent.updateContainerNodeMetrics(currentContext(), isSuspended); }
+ Optional<ContainerStats> updateContainerNodeMetrics(boolean isSuspended) { return nodeAgent.updateContainerNodeMetrics(currentContext(), isSuspended); }
int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); }
@Override public void scheduleTickWith(NodeAgentContext context, Instant at) { nodeAgentScheduler.scheduleTickWith(context, at); }
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java
new file mode 100644
index 00000000000..a1f750a34e3
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfo.java
@@ -0,0 +1,12 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+
+/**
+ * Represents /proc/meminfo, see proc(5).
+ *
+ * @param memTotalBytes Total usable RAM (i.e., physical RAM minus a few reserved bits and the kernel binary code).
+ * @param memAvailableBytes An estimate of how much memory is available for starting new applications, without swapping.
+ *
+ * @author hakon
+ */
+public record ProcMeminfo(long memTotalBytes, long memAvailableBytes) { }
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java
new file mode 100644
index 00000000000..17abe6c7b46
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/ProcMeminfoReader.java
@@ -0,0 +1,42 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.nodeadmin;
+
+import com.yahoo.yolean.Exceptions;
+
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Reads /proc/meminfo, see proc(5).
+ *
+ * @author hakon
+ */
+public class ProcMeminfoReader {
+ private static final String PROC_MEMINFO = "/proc/meminfo";
+ private static final Pattern MEM_TOTAL_PATTERN = Pattern.compile("MemTotal: *([0-9]+) kB");
+ private static final Pattern MEM_AVAILABLE_PATTERN = Pattern.compile("MemAvailable: *([0-9]+) kB");
+
+ private final FileSystem fileSystem;
+
+ public ProcMeminfoReader(FileSystem fileSystem) {
+ this.fileSystem = fileSystem;
+ }
+
+ public ProcMeminfo read() {
+ return read(Exceptions.uncheck(() -> Files.readString(fileSystem.getPath(PROC_MEMINFO))));
+ }
+
+ static ProcMeminfo read(String meminfoContent) {
+ return new ProcMeminfo(readKbGroup(meminfoContent, MEM_TOTAL_PATTERN),
+ readKbGroup(meminfoContent, MEM_AVAILABLE_PATTERN));
+ }
+
+ private static long readKbGroup(String string, Pattern pattern) {
+ Matcher matcher = pattern.matcher(string);
+ if (!matcher.find())
+ throw new IllegalArgumentException(pattern + " did not match anything in " + PROC_MEMINFO);
+ return Long.parseLong(matcher.group(1)) * 1024;
+ }
+}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index 5b9bcee83bf..18c981fdf17 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -1,6 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.nodeagent;
+import com.yahoo.vespa.hosted.node.admin.container.ContainerStats;
+
+import java.util.Optional;
+
/**
* Responsible for management of a single node over its lifecycle.
* May own its own resources, threads etc. Runs independently, but receives signals
@@ -28,9 +32,9 @@ public interface NodeAgent {
void stopForRemoval(NodeAgentContext context);
/**
- * Updates metric receiver with the latest node-agent stats
+ * Updates metric receiver with the latest node-agent stats, and returns the container stats if available.
*/
- default void updateContainerNodeMetrics(NodeAgentContext context, boolean isSuspended) {}
+ default Optional<ContainerStats> updateContainerNodeMetrics(NodeAgentContext context, boolean isSuspended) { return Optional.empty(); }
/**
* Returns and resets number of unhandled exceptions
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
index a2312a23925..1773eb4be25 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integration/ContainerTester.java
@@ -17,6 +17,8 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.VespaServiceDumper;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminImpl;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.NodeAdminStateUpdater;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.ProcMeminfo;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.ProcMeminfoReader;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextFactory;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl;
@@ -36,7 +38,6 @@ import java.util.Optional;
import java.util.concurrent.Phaser;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Logger;
import static org.mockito.ArgumentMatchers.any;
@@ -87,6 +88,8 @@ public class ContainerTester implements AutoCloseable {
Clock clock = Clock.systemUTC();
Metrics metrics = new Metrics();
FileSystem fileSystem = TestFileSystem.create();
+ ProcMeminfoReader procMeminfoReader = mock(ProcMeminfoReader.class);
+ when(procMeminfoReader.read()).thenReturn(new ProcMeminfo(1, 2));
NodeAgentFactory nodeAgentFactory = (contextSupplier, nodeContext) ->
new NodeAgentImpl(contextSupplier, nodeRepository, orchestrator, containerOperations, () -> RegistryCredentials.none,
@@ -106,7 +109,7 @@ public class ContainerTester implements AutoCloseable {
phaser.arriveAndDeregister();
}
};
- nodeAdmin = new NodeAdminImpl(nodeAgentFactory, metrics, clock, Duration.ofMillis(10), Duration.ZERO);
+ nodeAdmin = new NodeAdminImpl(nodeAgentFactory, metrics, clock, Duration.ofMillis(10), Duration.ZERO, procMeminfoReader);
NodeAgentContextFactory nodeAgentContextFactory = (nodeSpec, acl) ->
NodeAgentContextImpl.builder(nodeSpec).acl(acl).fileSystem(fileSystem).build();
nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeAgentContextFactory, nodeRepository, orchestrator,
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
index 8504a724417..96c18517bfe 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
@@ -35,9 +35,9 @@ public class NodeAdminImplTest {
private final NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory = mock(NodeAgentWithSchedulerFactory.class);
private final ManualClock clock = new ManualClock();
-
+ private final ProcMeminfoReader procMeminfoReader = mock(ProcMeminfoReader.class);
private final NodeAdminImpl nodeAdmin = new NodeAdminImpl(nodeAgentWithSchedulerFactory,
- new Metrics(), clock, Duration.ZERO, Duration.ZERO);
+ new Metrics(), clock, Duration.ZERO, Duration.ZERO, procMeminfoReader);
@Test
void nodeAgentsAreProperlyLifeCycleManaged() {