diff options
author | Harald Musum <musum@verizonmedia.com> | 2021-09-14 06:36:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-14 06:36:47 +0200 |
commit | c12ca01e6796f882ff76fe946bbb3de342881a32 (patch) | |
tree | c909a8ecc6a3db4ad1f25935b5e4ebce16e38f2d /node-repository | |
parent | bb08873b35c620d242614658ded6e371cb1fa303 (diff) | |
parent | 7088870382b4f78ef3c0375e845b587e7211cd95 (diff) |
Merge pull request #19097 from vespa-engine/hakon/support-starting-with-unknown-service-status
Support having services in status UNKNOWN at cfg boot
Diffstat (limited to 'node-repository')
3 files changed, 23 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index d8bbf305b57..c0b635cf764 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -248,6 +248,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer { metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context); + metric.set("numberOfServicesUnknown", servicesCount.getOrDefault(ServiceStatus.UNKNOWN, 0L), context); + boolean down = NodeHealthTracker.allDown(services); metric.set("nodeFailerBadNode", (down ? 1 : 0), context); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java index 37969a30b81..693d8b6be9c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java @@ -116,7 +116,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { .collect(Collectors.groupingBy(ServiceInstance::serviceStatus, counting())); return countsByStatus.getOrDefault(ServiceStatus.UP, 0L) <= 0L && - countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L; + countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L && + countsByStatus.getOrDefault(ServiceStatus.UNKNOWN, 0L) == 0L; } /** Get node by given hostname and application. The applicationLock must be held when calling this */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 2bd0c91f4a1..ad887212a05 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -672,14 +672,23 @@ public class NodeFailerTest { @Test public void testUpness() { - assertFalse(badNode(0, 0, 0)); - assertFalse(badNode(0, 0, 2)); - assertFalse(badNode(0, 3, 0)); - assertFalse(badNode(0, 3, 2)); - assertTrue(badNode(1, 0, 0)); - assertTrue(badNode(1, 0, 2)); - assertFalse(badNode(1, 3, 0)); - assertFalse(badNode(1, 3, 2)); + assertFalse(badNode(0, 0, 0, 0)); + assertFalse(badNode(0, 0, 0, 2)); + assertFalse(badNode(0, 3, 0, 0)); + assertFalse(badNode(0, 3, 0, 2)); + assertTrue(badNode(1, 0, 0, 0)); + assertTrue(badNode(1, 0, 0, 2)); + assertFalse(badNode(1, 3, 0, 0)); + assertFalse(badNode(1, 3, 0, 2)); + + assertFalse(badNode(0, 0, 1, 0)); + assertFalse(badNode(0, 0, 1, 2)); + assertFalse(badNode(0, 3, 1, 0)); + assertFalse(badNode(0, 3, 1, 2)); + assertFalse(badNode(1, 0, 1, 0)); + assertFalse(badNode(1, 0, 1, 2)); + assertFalse(badNode(1, 3, 1, 0)); + assertFalse(badNode(1, 3, 1, 2)); } private void addServiceInstances(List<ServiceInstance> list, ServiceStatus status, int num) { @@ -690,10 +699,11 @@ public class NodeFailerTest { } } - private boolean badNode(int numDown, int numUp, int numNotChecked) { + private boolean badNode(int numDown, int numUp, int numUnknown, int numNotChecked) { List<ServiceInstance> services = new ArrayList<>(); addServiceInstances(services, ServiceStatus.DOWN, numDown); addServiceInstances(services, ServiceStatus.UP, numUp); + addServiceInstances(services, ServiceStatus.UNKNOWN, numUnknown); addServiceInstances(services, ServiceStatus.NOT_CHECKED, numNotChecked); Collections.shuffle(services); |