summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-09-14 06:36:47 +0200
committerGitHub <noreply@github.com>2021-09-14 06:36:47 +0200
commitc12ca01e6796f882ff76fe946bbb3de342881a32 (patch)
treec909a8ecc6a3db4ad1f25935b5e4ebce16e38f2d /node-repository/src
parentbb08873b35c620d242614658ded6e371cb1fa303 (diff)
parent7088870382b4f78ef3c0375e845b587e7211cd95 (diff)
Merge pull request #19097 from vespa-engine/hakon/support-starting-with-unknown-service-status
Support having services in status UNKNOWN at cfg boot
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java28
3 files changed, 23 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index d8bbf305b57..c0b635cf764 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -248,6 +248,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context);
+ metric.set("numberOfServicesUnknown", servicesCount.getOrDefault(ServiceStatus.UNKNOWN, 0L), context);
+
boolean down = NodeHealthTracker.allDown(services);
metric.set("nodeFailerBadNode", (down ? 1 : 0), context);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
index 37969a30b81..693d8b6be9c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
@@ -116,7 +116,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
.collect(Collectors.groupingBy(ServiceInstance::serviceStatus, counting()));
return countsByStatus.getOrDefault(ServiceStatus.UP, 0L) <= 0L &&
- countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L;
+ countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L &&
+ countsByStatus.getOrDefault(ServiceStatus.UNKNOWN, 0L) == 0L;
}
/** Get node by given hostname and application. The applicationLock must be held when calling this */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index 2bd0c91f4a1..ad887212a05 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -672,14 +672,23 @@ public class NodeFailerTest {
@Test
public void testUpness() {
- assertFalse(badNode(0, 0, 0));
- assertFalse(badNode(0, 0, 2));
- assertFalse(badNode(0, 3, 0));
- assertFalse(badNode(0, 3, 2));
- assertTrue(badNode(1, 0, 0));
- assertTrue(badNode(1, 0, 2));
- assertFalse(badNode(1, 3, 0));
- assertFalse(badNode(1, 3, 2));
+ assertFalse(badNode(0, 0, 0, 0));
+ assertFalse(badNode(0, 0, 0, 2));
+ assertFalse(badNode(0, 3, 0, 0));
+ assertFalse(badNode(0, 3, 0, 2));
+ assertTrue(badNode(1, 0, 0, 0));
+ assertTrue(badNode(1, 0, 0, 2));
+ assertFalse(badNode(1, 3, 0, 0));
+ assertFalse(badNode(1, 3, 0, 2));
+
+ assertFalse(badNode(0, 0, 1, 0));
+ assertFalse(badNode(0, 0, 1, 2));
+ assertFalse(badNode(0, 3, 1, 0));
+ assertFalse(badNode(0, 3, 1, 2));
+ assertFalse(badNode(1, 0, 1, 0));
+ assertFalse(badNode(1, 0, 1, 2));
+ assertFalse(badNode(1, 3, 1, 0));
+ assertFalse(badNode(1, 3, 1, 2));
}
private void addServiceInstances(List<ServiceInstance> list, ServiceStatus status, int num) {
@@ -690,10 +699,11 @@ public class NodeFailerTest {
}
}
- private boolean badNode(int numDown, int numUp, int numNotChecked) {
+ private boolean badNode(int numDown, int numUp, int numUnknown, int numNotChecked) {
List<ServiceInstance> services = new ArrayList<>();
addServiceInstances(services, ServiceStatus.DOWN, numDown);
addServiceInstances(services, ServiceStatus.UP, numUp);
+ addServiceInstances(services, ServiceStatus.UNKNOWN, numUnknown);
addServiceInstances(services, ServiceStatus.NOT_CHECKED, numNotChecked);
Collections.shuffle(services);