summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java10
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java11
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java7
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java32
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java29
-rw-r--r--node-admin/src/test/resources/docker.stats.json8
-rw-r--r--node-admin/src/test/resources/docker.stats.metrics.active.expected.json130
-rw-r--r--node-admin/src/test/resources/docker.stats.metrics.expected.json121
-rw-r--r--node-admin/src/test/resources/docker.stats.metrics.ready.expected.json21
9 files changed, 202 insertions, 167 deletions
diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java
index c641c5bfab1..94940601f80 100644
--- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java
+++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/metrics/MetricReceiverWrapper.java
@@ -80,16 +80,6 @@ public class MetricReceiverWrapper {
}
}
- public List<DimensionMetrics> getMetrics(String application) {
- synchronized (monitor) {
- Map<Dimensions, Map<String, MetricValue>> metricsByDimensions = getOrCreateApplicationMetrics(application);
- return metricsByDimensions.entrySet()
- .stream()
- .map(entry -> new DimensionMetrics(application, entry.getKey(), entry.getValue()))
- .collect(Collectors.toList());
- }
- }
-
public List<DimensionMetrics> getAllMetrics() {
synchronized (monitor) {
List<DimensionMetrics> dimensionMetrics = new ArrayList<>();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index f1b2a1a434f..a53dbb5b8b6 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -15,7 +15,6 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent;
import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger;
-import com.yahoo.vespa.hosted.provision.Node;
import java.util.HashSet;
import java.util.LinkedHashMap;
@@ -51,9 +50,8 @@ public class NodeAdminImpl implements NodeAdmin {
private final int nodeAgentScanIntervalMillis;
- private GaugeWrapper numberOfContainersInActiveState;
- private GaugeWrapper numberOfContainersInLoadImageState;
- private CounterWrapper numberOfUnhandledExceptionsInNodeAgent;
+ private final GaugeWrapper numberOfContainersInLoadImageState;
+ private final CounterWrapper numberOfUnhandledExceptionsInNodeAgent;
public NodeAdminImpl(final DockerOperations dockerOperations, final Function<String, NodeAgent> nodeAgentFactory,
final Optional<StorageMaintainer> storageMaintainer, final int nodeAgentScanIntervalMillis,
@@ -67,7 +65,6 @@ public class NodeAdminImpl implements NodeAdmin {
.add("host", HostName.getLocalhost())
.add("role", "docker").build();
- this.numberOfContainersInActiveState = metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.state.active");
this.numberOfContainersInLoadImageState = metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.image.loading");
this.numberOfUnhandledExceptionsInNodeAgent = metricReceiver.declareCounter(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.unhandled_exceptions");
@@ -93,18 +90,14 @@ public class NodeAdminImpl implements NodeAdmin {
}
private void updateNodeAgentMetrics() {
- int numberContainersInActive = 0;
int numberContainersWaitingImage = 0;
int numberOfNewUnhandledExceptions = 0;
for (NodeAgent nodeAgent : nodeAgents.values()) {
- Optional<ContainerNodeSpec> nodeSpec = nodeAgent.getContainerNodeSpec();
- if (nodeSpec.isPresent() && nodeSpec.get().nodeState == Node.State.active) numberContainersInActive++;
if (nodeAgent.isDownloadingImage()) numberContainersWaitingImage++;
numberOfNewUnhandledExceptions += nodeAgent.getAndResetNumberOfUnhandledExceptions();
}
- numberOfContainersInActiveState.sample(numberContainersInActive);
numberOfContainersInLoadImageState.sample(numberContainersWaitingImage);
numberOfUnhandledExceptionsInNodeAgent.add(numberOfNewUnhandledExceptions);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index dcaa82db81f..66367f46521 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -2,10 +2,8 @@
package com.yahoo.vespa.hosted.node.admin.nodeagent;
import com.yahoo.vespa.hosted.dockerapi.ContainerName;
-import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec;
import java.util.Map;
-import java.util.Optional;
/**
* Responsible for management of a single node over its lifecycle.
@@ -57,11 +55,6 @@ public interface NodeAgent {
void stop();
/**
- * Returns the {@link ContainerNodeSpec} for this node agent.
- */
- Optional<ContainerNodeSpec> getContainerNodeSpec();
-
- /**
* Updates metric receiver with the latest node-agent stats
*/
void updateContainerNodeMetrics(int numAllocatedContainersOnHost);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index be2bcf70b1c..280e2765899 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -509,12 +509,8 @@ public class NodeAgentImpl implements NodeAgent {
synchronized (monitor) {
nodeSpec = lastNodeSpec;
}
+ if (nodeSpec == null) return;
- if (nodeSpec == null || !vespaVersion.isPresent()) return;
- Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName);
- if ( ! containerStats.isPresent()) return;
-
- Docker.ContainerStats stats = containerStats.get();
Dimensions.Builder dimensionsBuilder = new Dimensions.Builder()
.add("host", hostname)
.add("role", "tenants")
@@ -522,6 +518,7 @@ public class NodeAgentImpl implements NodeAgent {
.add("state", nodeSpec.nodeState.toString())
.add("zone", environment.getZone())
.add("parentHostname", environment.getParentHostHostname());
+ vespaVersion.ifPresent(version -> dimensionsBuilder.add("vespaVersion", version));
nodeSpec.owner.ifPresent(owner ->
dimensionsBuilder
@@ -535,10 +532,17 @@ public class NodeAgentImpl implements NodeAgent {
dimensionsBuilder
.add("clustertype", membership.clusterType)
.add("clusterid", membership.clusterId));
+ Dimensions dimensions = dimensionsBuilder.build();
+ metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.alive").sample(1);
- vespaVersion.ifPresent(version -> dimensionsBuilder.add("vespaVersion", version));
- Dimensions dimensions = dimensionsBuilder.build();
+ // The remaining metrics require container to exists and be running
+ if (containerState == ABSENT) return;
+ Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName);
+ if ( ! containerStats.isPresent()) return;
+
+ Docker.ContainerStats stats = containerStats.get();
+
long currentCpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue();
long currentCpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue();
@@ -560,8 +564,12 @@ public class NodeAgentImpl implements NodeAgent {
stats.getNetworks().forEach((interfaceName, interfaceStats) -> {
Dimensions netDims = dimensionsBuilder.add("interface", interfaceName).build();
- addIfNotNull(netDims, "node.network.bytes_rcvd", interfaceStats, "rx_bytes");
- addIfNotNull(netDims, "node.network.bytes_sent", interfaceStats, "tx_bytes");
+ addIfNotNull(netDims, "node.net.in.bytes", interfaceStats, "rx_bytes");
+ addIfNotNull(netDims, "node.net.in.errors", interfaceStats, "rx_errors");
+ addIfNotNull(netDims, "node.net.in.dropped", interfaceStats, "rx_dropped");
+ addIfNotNull(netDims, "node.net.out.bytes", interfaceStats, "tx_bytes");
+ addIfNotNull(netDims, "node.net.out.errors", interfaceStats, "tx_errors");
+ addIfNotNull(netDims, "node.net.out.dropped", interfaceStats, "tx_dropped");
});
long bytesInGB = 1 << 30;
@@ -589,12 +597,6 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- public Optional<ContainerNodeSpec> getContainerNodeSpec() {
- synchronized (monitor) {
- return Optional.ofNullable(lastNodeSpec);
- }
- }
-
public String getHostname() {
return hostname;
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index a4676db29e7..9273555994d 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -438,7 +438,34 @@ public class NodeAgentImplTest {
nodeAgent.updateContainerNodeMetrics(5);
- File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.expected.json").getFile());
+ File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.active.expected.json").getFile());
+ Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class);
+ Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw();
+
+ assertEquals(expectedMetrics, actualMetrics);
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testGetRelevantMetricsForReadyNode() throws Exception {
+ final ObjectMapper objectMapper = new ObjectMapper();
+ ClassLoader classLoader = getClass().getClassLoader();
+
+ final ContainerNodeSpec nodeSpec = nodeSpecBuilder
+ .nodeState(Node.State.ready)
+ .build();
+
+ NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
+
+ when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec));
+ when(dockerOperations.shouldScheduleDownloadOfImage(eq(dockerImage))).thenReturn(false);
+ when(dockerOperations.getContainerStats(eq(containerName))).thenReturn(Optional.empty());
+
+ nodeAgent.tick(); // Run the tick loop once to initialize lastNodeSpec
+
+ nodeAgent.updateContainerNodeMetrics(5);
+
+ File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.ready.expected.json").getFile());
Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class);
Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw();
diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json
index c17a29ae038..d266b01f12d 100644
--- a/node-admin/src/test/resources/docker.stats.json
+++ b/node-admin/src/test/resources/docker.stats.json
@@ -355,12 +355,12 @@
"eth0":{
"rx_bytes":19499270,
"rx_packets":58913,
- "rx_errors":0,
- "rx_dropped":0,
+ "rx_errors":55,
+ "rx_dropped":4,
"tx_bytes":20303455,
"tx_packets":62319,
- "tx_errors":0,
- "tx_dropped":0
+ "tx_errors":3,
+ "tx_dropped":13
},
"eth1":{
"rx_bytes":3245766,
diff --git a/node-admin/src/test/resources/docker.stats.metrics.active.expected.json b/node-admin/src/test/resources/docker.stats.metrics.active.expected.json
new file mode 100644
index 00000000000..2ed42cdb7c4
--- /dev/null
+++ b/node-admin/src/test/resources/docker.stats.metrics.active.expected.json
@@ -0,0 +1,130 @@
+[
+ {
+ "application": "host_life",
+ "dimensions": {
+ "flavor": "docker",
+ "instanceName": "testinstance",
+ "applicationId": "tester.testapp.testinstance",
+ "applicationName": "testapp",
+ "app": "testapp.testinstance",
+ "clustertype": "clustType",
+ "role": "tenants",
+ "tenantName": "tester",
+ "zone": "dev.us-east-1",
+ "host": "host1.test.yahoo.com",
+ "vespaVersion": "1.2.3",
+ "state": "active",
+ "clusterid": "clustId",
+ "parentHostname": "parent.host.name.yahoo.com"
+ },
+ "metrics": {
+ "alive": 1.0,
+ "uptime": 1234.0
+ },
+ "routing": {
+ "yamas": {
+ "namespaces": [
+ "Vespa"
+ ]
+ }
+ }
+ },
+ {
+ "application": "docker",
+ "dimensions": {
+ "flavor": "docker",
+ "applicationName": "testapp",
+ "instanceName": "testinstance",
+ "applicationId": "tester.testapp.testinstance",
+ "app": "testapp.testinstance",
+ "clustertype": "clustType",
+ "role": "tenants",
+ "tenantName": "tester",
+ "host": "host1.test.yahoo.com",
+ "vespaVersion": "1.2.3",
+ "state": "active",
+ "clusterid": "clustId",
+ "parentHostname": "parent.host.name.yahoo.com",
+ "zone": "dev.us-east-1",
+ "interface": "eth1"
+ },
+ "metrics": {
+ "node.net.out.bytes": 5.4246745E7,
+ "node.net.out.errors": 0.0,
+ "node.net.out.dropped": 0.0,
+ "node.net.in.bytes": 3245766.0,
+ "node.net.in.errors": 0.0,
+ "node.net.in.dropped": 0.0
+ },
+ "routing": {
+ "yamas": {
+ "namespaces": ["Vespa"]
+ }
+ }
+ },
+ {
+ "application": "docker",
+ "dimensions": {
+ "flavor": "docker",
+ "applicationName": "testapp",
+ "instanceName": "testinstance",
+ "applicationId": "tester.testapp.testinstance",
+ "app": "testapp.testinstance",
+ "clustertype": "clustType",
+ "role": "tenants",
+ "tenantName": "tester",
+ "host": "host1.test.yahoo.com",
+ "vespaVersion": "1.2.3",
+ "state": "active",
+ "clusterid": "clustId",
+ "parentHostname": "parent.host.name.yahoo.com",
+ "zone": "dev.us-east-1"
+ },
+ "metrics": {
+ "node.alive": 1.0,
+ "node.cpu.busy.pct": 6.75,
+ "node.cpu.throttled_time": 4523.0,
+ "node.memory.usage": 1.326026752E9,
+ "node.memory.limit": 4.294967296E9,
+ "node.disk.limit": 2.68435456E11
+ },
+ "routing": {
+ "yamas": {
+ "namespaces": ["Vespa"]
+ }
+ }
+ },
+ {
+ "application": "docker",
+ "dimensions": {
+ "flavor": "docker",
+ "applicationName": "testapp",
+ "instanceName": "testinstance",
+ "applicationId": "tester.testapp.testinstance",
+ "app": "testapp.testinstance",
+ "clustertype": "clustType",
+ "role": "tenants",
+ "tenantName": "tester",
+ "host": "host1.test.yahoo.com",
+ "vespaVersion": "1.2.3",
+ "state": "active",
+ "clusterid": "clustId",
+ "parentHostname": "parent.host.name.yahoo.com",
+ "zone": "dev.us-east-1",
+ "interface": "eth0"
+ },
+ "metrics": {
+ "node.net.out.bytes": 2.0303455E7,
+ "node.net.out.errors": 3.0,
+ "node.net.out.dropped": 13.0,
+ "node.net.in.bytes": 1.949927E7,
+ "node.net.in.errors": 55.0,
+ "node.net.in.dropped": 4.0
+ },
+ "routing": {
+ "yamas": {
+ "namespaces": ["Vespa"]
+ }
+ }
+ }
+]
diff --git a/node-admin/src/test/resources/docker.stats.metrics.expected.json b/node-admin/src/test/resources/docker.stats.metrics.expected.json
deleted file mode 100644
index f4de239fdce..00000000000
--- a/node-admin/src/test/resources/docker.stats.metrics.expected.json
+++ /dev/null
@@ -1,121 +0,0 @@
-[
- {
- "application": "host_life",
- "dimensions": {
- "flavor": "docker",
- "instanceName": "testinstance",
- "applicationId": "tester.testapp.testinstance",
- "applicationName": "testapp",
- "app": "testapp.testinstance",
- "clustertype": "clustType",
- "role": "tenants",
- "tenantName": "tester",
- "zone": "dev.us-east-1",
- "host": "host1.test.yahoo.com",
- "vespaVersion": "1.2.3",
- "state": "active",
- "clusterid": "clustId",
- "parentHostname": "parent.host.name.yahoo.com"
- },
- "metrics": {
- "alive": 1.0,
- "uptime": 1234.0
- },
- "routing": {
- "yamas": {
- "namespaces": [
- "Vespa"
- ]
- }
- }
- },
- {
- "application":"docker",
- "dimensions":{
- "flavor":"docker",
- "applicationName": "testapp",
- "instanceName": "testinstance",
- "applicationId": "tester.testapp.testinstance",
- "app":"testapp.testinstance",
- "clustertype":"clustType",
- "role":"tenants",
- "tenantName":"tester",
- "host":"host1.test.yahoo.com",
- "vespaVersion":"1.2.3",
- "state":"active",
- "clusterid":"clustId",
- "parentHostname":"parent.host.name.yahoo.com",
- "zone":"dev.us-east-1",
- "interface":"eth1"
- },
- "metrics":{
- "node.network.bytes_sent":5.4246745E7,
- "node.network.bytes_rcvd":3245766.0
- },
- "routing":{
- "yamas":{
- "namespaces": ["Vespa"]
- }
- }
- },
- {
- "application":"docker",
- "dimensions":{
- "flavor":"docker",
- "applicationName": "testapp",
- "instanceName": "testinstance",
- "applicationId": "tester.testapp.testinstance",
- "app":"testapp.testinstance",
- "clustertype":"clustType",
- "role":"tenants",
- "tenantName":"tester",
- "host":"host1.test.yahoo.com",
- "vespaVersion":"1.2.3",
- "state":"active",
- "clusterid":"clustId",
- "parentHostname":"parent.host.name.yahoo.com",
- "zone":"dev.us-east-1"
- },
- "metrics":{
- "node.cpu.busy.pct": 6.75,
- "node.cpu.throttled_time": 4523.0,
- "node.memory.usage":1.326026752E9,
- "node.memory.limit":4.294967296E9,
- "node.disk.limit":2.68435456E11
- },
- "routing":{
- "yamas":{
- "namespaces": ["Vespa"]
- }
- }
- },
- {
- "application":"docker",
- "dimensions":{
- "flavor":"docker",
- "applicationName": "testapp",
- "instanceName": "testinstance",
- "applicationId": "tester.testapp.testinstance",
- "app":"testapp.testinstance",
- "clustertype":"clustType",
- "role":"tenants",
- "tenantName":"tester",
- "host":"host1.test.yahoo.com",
- "vespaVersion":"1.2.3",
- "state":"active",
- "clusterid":"clustId",
- "parentHostname":"parent.host.name.yahoo.com",
- "zone":"dev.us-east-1",
- "interface":"eth0"
- },
- "metrics":{
- "node.network.bytes_sent":2.0303455E7,
- "node.network.bytes_rcvd":1.949927E7
- },
- "routing":{
- "yamas":{
- "namespaces": ["Vespa"]
- }
- }
- }
-]
diff --git a/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json b/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json
new file mode 100644
index 00000000000..de3e26c8e13
--- /dev/null
+++ b/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json
@@ -0,0 +1,21 @@
+[
+ {
+ "application": "docker",
+ "dimensions": {
+ "flavor": "docker",
+ "role": "tenants",
+ "host": "host1.test.yahoo.com",
+ "state": "ready",
+ "parentHostname": "parent.host.name.yahoo.com",
+ "zone": "dev.us-east-1"
+ },
+ "metrics": {
+ "node.alive": 1.0
+ },
+ "routing": {
+ "yamas": {
+ "namespaces": ["Vespa"]
+ }
+ }
+ }
+]