diff options
author | valerijf <valerijf@yahoo-inc.com> | 2017-03-24 09:12:49 +0100 |
---|---|---|
committer | valerijf <valerijf@yahoo-inc.com> | 2017-03-24 09:12:49 +0100 |
commit | 3c77553a4f244ea24be55a53beb9480946f3e0ba (patch) | |
tree | f2b5d2c56f76d4bda46fc8dfddf40e99545ea307 /node-admin | |
parent | 968d43a69c3f55a09bbbf62827a65b645241f8ee (diff) |
Added network metrics for docker containers
Diffstat (limited to 'node-admin')
8 files changed, 202 insertions, 157 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index f1b2a1a434f..a53dbb5b8b6 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -15,7 +15,6 @@ import com.yahoo.vespa.hosted.node.admin.maintenance.acl.AclMaintainer; import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgent; import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; -import com.yahoo.vespa.hosted.provision.Node; import java.util.HashSet; import java.util.LinkedHashMap; @@ -51,9 +50,8 @@ public class NodeAdminImpl implements NodeAdmin { private final int nodeAgentScanIntervalMillis; - private GaugeWrapper numberOfContainersInActiveState; - private GaugeWrapper numberOfContainersInLoadImageState; - private CounterWrapper numberOfUnhandledExceptionsInNodeAgent; + private final GaugeWrapper numberOfContainersInLoadImageState; + private final CounterWrapper numberOfUnhandledExceptionsInNodeAgent; public NodeAdminImpl(final DockerOperations dockerOperations, final Function<String, NodeAgent> nodeAgentFactory, final Optional<StorageMaintainer> storageMaintainer, final int nodeAgentScanIntervalMillis, @@ -67,7 +65,6 @@ public class NodeAdminImpl implements NodeAdmin { .add("host", HostName.getLocalhost()) .add("role", "docker").build(); - this.numberOfContainersInActiveState = metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.state.active"); this.numberOfContainersInLoadImageState = metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.image.loading"); this.numberOfUnhandledExceptionsInNodeAgent = metricReceiver.declareCounter(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "nodes.unhandled_exceptions"); @@ -93,18 +90,14 @@ public class NodeAdminImpl implements NodeAdmin { } private void updateNodeAgentMetrics() { - int numberContainersInActive = 0; int numberContainersWaitingImage = 0; int numberOfNewUnhandledExceptions = 0; for (NodeAgent nodeAgent : nodeAgents.values()) { - Optional<ContainerNodeSpec> nodeSpec = nodeAgent.getContainerNodeSpec(); - if (nodeSpec.isPresent() && nodeSpec.get().nodeState == Node.State.active) numberContainersInActive++; if (nodeAgent.isDownloadingImage()) numberContainersWaitingImage++; numberOfNewUnhandledExceptions += nodeAgent.getAndResetNumberOfUnhandledExceptions(); } - numberOfContainersInActiveState.sample(numberContainersInActive); numberOfContainersInLoadImageState.sample(numberContainersWaitingImage); numberOfUnhandledExceptionsInNodeAgent.add(numberOfNewUnhandledExceptions); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index dcaa82db81f..66367f46521 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -2,10 +2,8 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import com.yahoo.vespa.hosted.dockerapi.ContainerName; -import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; import java.util.Map; -import java.util.Optional; /** * Responsible for management of a single node over its lifecycle. @@ -57,11 +55,6 @@ public interface NodeAgent { void stop(); /** - * Returns the {@link ContainerNodeSpec} for this node agent. - */ - Optional<ContainerNodeSpec> getContainerNodeSpec(); - - /** * Updates metric receiver with the latest node-agent stats */ void updateContainerNodeMetrics(int numAllocatedContainersOnHost); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index be2bcf70b1c..280e2765899 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -509,12 +509,8 @@ public class NodeAgentImpl implements NodeAgent { synchronized (monitor) { nodeSpec = lastNodeSpec; } + if (nodeSpec == null) return; - if (nodeSpec == null || !vespaVersion.isPresent()) return; - Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); - if ( ! containerStats.isPresent()) return; - - Docker.ContainerStats stats = containerStats.get(); Dimensions.Builder dimensionsBuilder = new Dimensions.Builder() .add("host", hostname) .add("role", "tenants") @@ -522,6 +518,7 @@ public class NodeAgentImpl implements NodeAgent { .add("state", nodeSpec.nodeState.toString()) .add("zone", environment.getZone()) .add("parentHostname", environment.getParentHostHostname()); + vespaVersion.ifPresent(version -> dimensionsBuilder.add("vespaVersion", version)); nodeSpec.owner.ifPresent(owner -> dimensionsBuilder @@ -535,10 +532,17 @@ public class NodeAgentImpl implements NodeAgent { dimensionsBuilder .add("clustertype", membership.clusterType) .add("clusterid", membership.clusterId)); + Dimensions dimensions = dimensionsBuilder.build(); + metricReceiver.declareGauge(MetricReceiverWrapper.APPLICATION_DOCKER, dimensions, "node.alive").sample(1); - vespaVersion.ifPresent(version -> dimensionsBuilder.add("vespaVersion", version)); - Dimensions dimensions = dimensionsBuilder.build(); + // The remaining metrics require container to exists and be running + if (containerState == ABSENT) return; + Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName); + if ( ! containerStats.isPresent()) return; + + Docker.ContainerStats stats = containerStats.get(); + long currentCpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue(); long currentCpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue(); @@ -560,8 +564,12 @@ public class NodeAgentImpl implements NodeAgent { stats.getNetworks().forEach((interfaceName, interfaceStats) -> { Dimensions netDims = dimensionsBuilder.add("interface", interfaceName).build(); - addIfNotNull(netDims, "node.network.bytes_rcvd", interfaceStats, "rx_bytes"); - addIfNotNull(netDims, "node.network.bytes_sent", interfaceStats, "tx_bytes"); + addIfNotNull(netDims, "node.net.in.bytes", interfaceStats, "rx_bytes"); + addIfNotNull(netDims, "node.net.in.errors", interfaceStats, "rx_errors"); + addIfNotNull(netDims, "node.net.in.dropped", interfaceStats, "rx_dropped"); + addIfNotNull(netDims, "node.net.out.bytes", interfaceStats, "tx_bytes"); + addIfNotNull(netDims, "node.net.out.errors", interfaceStats, "tx_errors"); + addIfNotNull(netDims, "node.net.out.dropped", interfaceStats, "tx_dropped"); }); long bytesInGB = 1 << 30; @@ -589,12 +597,6 @@ public class NodeAgentImpl implements NodeAgent { } } - public Optional<ContainerNodeSpec> getContainerNodeSpec() { - synchronized (monitor) { - return Optional.ofNullable(lastNodeSpec); - } - } - public String getHostname() { return hostname; } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index a4676db29e7..9273555994d 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -438,7 +438,34 @@ public class NodeAgentImplTest { nodeAgent.updateContainerNodeMetrics(5); - File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.expected.json").getFile()); + File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.active.expected.json").getFile()); + Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class); + Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw(); + + assertEquals(expectedMetrics, actualMetrics); + } + + @Test + @SuppressWarnings("unchecked") + public void testGetRelevantMetricsForReadyNode() throws Exception { + final ObjectMapper objectMapper = new ObjectMapper(); + ClassLoader classLoader = getClass().getClassLoader(); + + final ContainerNodeSpec nodeSpec = nodeSpecBuilder + .nodeState(Node.State.ready) + .build(); + + NodeAgentImpl nodeAgent = makeNodeAgent(null, false); + + when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec)); + when(dockerOperations.shouldScheduleDownloadOfImage(eq(dockerImage))).thenReturn(false); + when(dockerOperations.getContainerStats(eq(containerName))).thenReturn(Optional.empty()); + + nodeAgent.tick(); // Run the tick loop once to initialize lastNodeSpec + + nodeAgent.updateContainerNodeMetrics(5); + + File expectedMetricsFile = new File(classLoader.getResource("docker.stats.metrics.ready.expected.json").getFile()); Set<Map<String, Object>> expectedMetrics = objectMapper.readValue(expectedMetricsFile, Set.class); Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw(); diff --git a/node-admin/src/test/resources/docker.stats.json b/node-admin/src/test/resources/docker.stats.json index c17a29ae038..d266b01f12d 100644 --- a/node-admin/src/test/resources/docker.stats.json +++ b/node-admin/src/test/resources/docker.stats.json @@ -355,12 +355,12 @@ "eth0":{ "rx_bytes":19499270, "rx_packets":58913, - "rx_errors":0, - "rx_dropped":0, + "rx_errors":55, + "rx_dropped":4, "tx_bytes":20303455, "tx_packets":62319, - "tx_errors":0, - "tx_dropped":0 + "tx_errors":3, + "tx_dropped":13 }, "eth1":{ "rx_bytes":3245766, diff --git a/node-admin/src/test/resources/docker.stats.metrics.active.expected.json b/node-admin/src/test/resources/docker.stats.metrics.active.expected.json new file mode 100644 index 00000000000..2ed42cdb7c4 --- /dev/null +++ b/node-admin/src/test/resources/docker.stats.metrics.active.expected.json @@ -0,0 +1,130 @@ +[ + { + "application": "host_life", + "dimensions": { + "flavor": "docker", + "instanceName": "testinstance", + "applicationId": "tester.testapp.testinstance", + "applicationName": "testapp", + "app": "testapp.testinstance", + "clustertype": "clustType", + "role": "tenants", + "tenantName": "tester", + "zone": "dev.us-east-1", + "host": "host1.test.yahoo.com", + "vespaVersion": "1.2.3", + "state": "active", + "clusterid": "clustId", + "parentHostname": "parent.host.name.yahoo.com" + }, + "metrics": { + "alive": 1.0, + "uptime": 1234.0 + }, + "routing": { + "yamas": { + "namespaces": [ + "Vespa" + ] + } + } + }, + { + "application": "docker", + "dimensions": { + "flavor": "docker", + "applicationName": "testapp", + "instanceName": "testinstance", + "applicationId": "tester.testapp.testinstance", + "app": "testapp.testinstance", + "clustertype": "clustType", + "role": "tenants", + "tenantName": "tester", + "host": "host1.test.yahoo.com", + "vespaVersion": "1.2.3", + "state": "active", + "clusterid": "clustId", + "parentHostname": "parent.host.name.yahoo.com", + "zone": "dev.us-east-1", + "interface": "eth1" + }, + "metrics": { + "node.net.out.bytes": 5.4246745E7, + "node.net.out.errors": 0.0, + "node.net.out.dropped": 0.0, + "node.net.in.bytes": 3245766.0, + "node.net.in.errors": 0.0, + "node.net.in.dropped": 0.0 + }, + "routing": { + "yamas": { + "namespaces": ["Vespa"] + } + } + }, + { + "application": "docker", + "dimensions": { + "flavor": "docker", + "applicationName": "testapp", + "instanceName": "testinstance", + "applicationId": "tester.testapp.testinstance", + "app": "testapp.testinstance", + "clustertype": "clustType", + "role": "tenants", + "tenantName": "tester", + "host": "host1.test.yahoo.com", + "vespaVersion": "1.2.3", + "state": "active", + "clusterid": "clustId", + "parentHostname": "parent.host.name.yahoo.com", + "zone": "dev.us-east-1" + }, + "metrics": { + "node.alive": 1.0, + "node.cpu.busy.pct": 6.75, + "node.cpu.throttled_time": 4523.0, + "node.memory.usage": 1.326026752E9, + "node.memory.limit": 4.294967296E9, + "node.disk.limit": 2.68435456E11 + }, + "routing": { + "yamas": { + "namespaces": ["Vespa"] + } + } + }, + { + "application": "docker", + "dimensions": { + "flavor": "docker", + "applicationName": "testapp", + "instanceName": "testinstance", + "applicationId": "tester.testapp.testinstance", + "app": "testapp.testinstance", + "clustertype": "clustType", + "role": "tenants", + "tenantName": "tester", + "host": "host1.test.yahoo.com", + "vespaVersion": "1.2.3", + "state": "active", + "clusterid": "clustId", + "parentHostname": "parent.host.name.yahoo.com", + "zone": "dev.us-east-1", + "interface": "eth0" + }, + "metrics": { + "node.net.out.bytes": 2.0303455E7, + "node.net.out.errors": 3.0, + "node.net.out.dropped": 13.0, + "node.net.in.bytes": 1.949927E7, + "node.net.in.errors": 55.0, + "node.net.in.dropped": 4.0 + }, + "routing": { + "yamas": { + "namespaces": ["Vespa"] + } + } + } +] diff --git a/node-admin/src/test/resources/docker.stats.metrics.expected.json b/node-admin/src/test/resources/docker.stats.metrics.expected.json deleted file mode 100644 index f4de239fdce..00000000000 --- a/node-admin/src/test/resources/docker.stats.metrics.expected.json +++ /dev/null @@ -1,121 +0,0 @@ -[ - { - "application": "host_life", - "dimensions": { - "flavor": "docker", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "applicationName": "testapp", - "app": "testapp.testinstance", - "clustertype": "clustType", - "role": "tenants", - "tenantName": "tester", - "zone": "dev.us-east-1", - "host": "host1.test.yahoo.com", - "vespaVersion": "1.2.3", - "state": "active", - "clusterid": "clustId", - "parentHostname": "parent.host.name.yahoo.com" - }, - "metrics": { - "alive": 1.0, - "uptime": 1234.0 - }, - "routing": { - "yamas": { - "namespaces": [ - "Vespa" - ] - } - } - }, - { - "application":"docker", - "dimensions":{ - "flavor":"docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app":"testapp.testinstance", - "clustertype":"clustType", - "role":"tenants", - "tenantName":"tester", - "host":"host1.test.yahoo.com", - "vespaVersion":"1.2.3", - "state":"active", - "clusterid":"clustId", - "parentHostname":"parent.host.name.yahoo.com", - "zone":"dev.us-east-1", - "interface":"eth1" - }, - "metrics":{ - "node.network.bytes_sent":5.4246745E7, - "node.network.bytes_rcvd":3245766.0 - }, - "routing":{ - "yamas":{ - "namespaces": ["Vespa"] - } - } - }, - { - "application":"docker", - "dimensions":{ - "flavor":"docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app":"testapp.testinstance", - "clustertype":"clustType", - "role":"tenants", - "tenantName":"tester", - "host":"host1.test.yahoo.com", - "vespaVersion":"1.2.3", - "state":"active", - "clusterid":"clustId", - "parentHostname":"parent.host.name.yahoo.com", - "zone":"dev.us-east-1" - }, - "metrics":{ - "node.cpu.busy.pct": 6.75, - "node.cpu.throttled_time": 4523.0, - "node.memory.usage":1.326026752E9, - "node.memory.limit":4.294967296E9, - "node.disk.limit":2.68435456E11 - }, - "routing":{ - "yamas":{ - "namespaces": ["Vespa"] - } - } - }, - { - "application":"docker", - "dimensions":{ - "flavor":"docker", - "applicationName": "testapp", - "instanceName": "testinstance", - "applicationId": "tester.testapp.testinstance", - "app":"testapp.testinstance", - "clustertype":"clustType", - "role":"tenants", - "tenantName":"tester", - "host":"host1.test.yahoo.com", - "vespaVersion":"1.2.3", - "state":"active", - "clusterid":"clustId", - "parentHostname":"parent.host.name.yahoo.com", - "zone":"dev.us-east-1", - "interface":"eth0" - }, - "metrics":{ - "node.network.bytes_sent":2.0303455E7, - "node.network.bytes_rcvd":1.949927E7 - }, - "routing":{ - "yamas":{ - "namespaces": ["Vespa"] - } - } - } -] diff --git a/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json b/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json new file mode 100644 index 00000000000..de3e26c8e13 --- /dev/null +++ b/node-admin/src/test/resources/docker.stats.metrics.ready.expected.json @@ -0,0 +1,21 @@ +[ + { + "application": "docker", + "dimensions": { + "flavor": "docker", + "role": "tenants", + "host": "host1.test.yahoo.com", + "state": "ready", + "parentHostname": "parent.host.name.yahoo.com", + "zone": "dev.us-east-1" + }, + "metrics": { + "node.alive": 1.0 + }, + "routing": { + "yamas": { + "namespaces": ["Vespa"] + } + } + } +] |