summaryrefslogtreecommitdiffstats
path: root/node-repository/src/test
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2018-12-06 11:42:56 +0100
committerMartin Polden <mpolden@mpolden.no>2018-12-06 11:42:56 +0100
commit08c2324f8da503c24db3ea44e7a78b59e4417582 (patch)
treed3c38ca5f0bb9e4ca1076b7db63bde3c3b3b09e4 /node-repository/src/test
parent5101119fcf57ff624169fd728ab92a3fef5530b3 (diff)
Emit metric for throttled node failures
Diffstat (limited to 'node-repository/src/test')
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java21
1 files changed, 15 insertions, 6 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index 08cf8e7dc20..6147543c102 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -487,7 +487,8 @@ public class NodeFailerTest {
// 2 nodes are failed (the minimum amount that are always allowed to fail)
tester.failer.run();
assertEquals(2, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is indicated by the metric", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 2, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// 6 more hours pass, no more nodes are failed
for (int minutes = 0, interval = 30; minutes < 6 * 60; minutes += interval) {
@@ -496,7 +497,8 @@ public class NodeFailerTest {
}
tester.failer.run();
assertEquals(2, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is indicated by the metric", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 2, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// 2 docker hosts now fail, 1 of them (with all its children is allowed to fail)
hosts.subList(0, 2).forEach(host -> {
@@ -509,7 +511,8 @@ public class NodeFailerTest {
tester.failer.run();
assertEquals(6, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is indicated by the metric", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 2, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// 24 more hours pass without any other nodes being failed out
for (int minutes = 0, interval = 30; minutes <= 23 * 60; minutes += interval) {
@@ -518,21 +521,24 @@ public class NodeFailerTest {
}
tester.failer.run();
assertEquals(6, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is indicated by the metric", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 2, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// Next, the 2 ready nodes that were dead from the start are failed out, and finally
// the second host and all its children are failed
tester.clock.advance(Duration.ofMinutes(30));
tester.failer.run();
assertEquals(12, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is not indicated by the metric, as no throttled attempt is made.", 0, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is not indicated by the metric, as no throttled attempt is made", 0, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("No throttled node failures", 0, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// Nothing else to fail
tester.clock.advance(Duration.ofHours(25));
tester.allNodesMakeAConfigRequestExcept(deadNodes);
tester.failer.run();
assertEquals(12, tester.nodeRepository.getNodes(Node.State.failed).size());
- assertEquals("Throttling is not indicated by the metric.", 0, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttling is not indicated by the metric", 0, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("No throttled node failures", 0, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
}
// Throttles based on percentage in large zone
@@ -550,6 +556,7 @@ public class NodeFailerTest {
// 1% are allowed to fail
assertEquals(5, tester.nodeRepository.getNodes(Node.State.failed).size());
assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 5, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// 6 more hours pass, no more nodes are failed
for (int minutes = 0, interval = 30; minutes < 6 * 60; minutes += interval) {
@@ -559,6 +566,7 @@ public class NodeFailerTest {
tester.failer.run();
assertEquals(5, tester.nodeRepository.getNodes(Node.State.failed).size());
assertEquals("Throttling is indicated by the metric.", 1, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("Throttled node failures", 5, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
// 18 more hours pass, 24 hours since the first 5 nodes were failed. The remaining 5 are failed
for (int minutes = 0, interval = 30; minutes < 18 * 60; minutes += interval) {
@@ -568,6 +576,7 @@ public class NodeFailerTest {
tester.failer.run();
assertEquals(10, tester.nodeRepository.getNodes(Node.State.failed).size());
assertEquals("Throttling is not indicated by the metric, as no throttled attempt is made.", 0, tester.metric.values.get("nodeFailThrottling"));
+ assertEquals("No throttled node failures", 0, tester.metric.values.get(NodeFailer.throttledNodeFailuresMetric));
}
}