diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-13 16:41:21 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-13 16:41:21 +0200 |
commit | a3fcef7201b804437becb7773821bb1457a1c8ce (patch) | |
tree | 42db280ea8d99bcb80378e907db8aa3ba6262cc6 | |
parent | dc4fd3d892d799cee3016f04f134bd1baa5c9ce6 (diff) |
Add ServiceStatus.UNKNOWN
17 files changed, 76 insertions, 27 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceStatus.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceStatus.java index a30ae29e1b6..4eb4c7cc990 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceStatus.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceStatus.java @@ -7,5 +7,10 @@ package com.yahoo.vespa.applicationmodel; public enum ServiceStatus { UP, DOWN, - NOT_CHECKED; + + /** The status has not yet been probed or has expired. A status of UP or DOWN is expected shortly. */ + UNKNOWN, + + /** The service is not monitored for health, and will never get any other status than NOT_CHECKED. */ + NOT_CHECKED } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 11cfbab66f7..0555fa6eb73 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -155,6 +155,12 @@ public class Flags { "Use a new algorithm to calculate the spare disks of a host.", "Takes effect on first run of DiskTask, typically after host-admin restart/upgrade."); + public static final UnboundBooleanFlag USE_UNKNOWN_SERVICE_STATUS = defineFeatureFlag( + "use-unknown-service-status", false, + List.of("hakonhall"), "2021-09-13", "2021-09-13", + "Whether to use the UNKNOWN ServiceStatus for services that have not yet been probed by service monitor.", + "Takes effect on first (re)start of config server."); + public static final UnboundBooleanFlag ENABLE_FEED_BLOCK_IN_DISTRIBUTOR = defineFeatureFlag( "enable-feed-block-in-distributor", true, List.of("geirst"), "2021-01-27", "2021-11-01", diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index d8bbf305b57..c0b635cf764 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -248,6 +248,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer { metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context); + metric.set("numberOfServicesUnknown", servicesCount.getOrDefault(ServiceStatus.UNKNOWN, 0L), context); + boolean down = NodeHealthTracker.allDown(services); metric.set("nodeFailerBadNode", (down ? 1 : 0), context); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java index 37969a30b81..693d8b6be9c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java @@ -116,7 +116,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { .collect(Collectors.groupingBy(ServiceInstance::serviceStatus, counting())); return countsByStatus.getOrDefault(ServiceStatus.UP, 0L) <= 0L && - countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L; + countsByStatus.getOrDefault(ServiceStatus.DOWN, 0L) > 0L && + countsByStatus.getOrDefault(ServiceStatus.UNKNOWN, 0L) == 0L; } /** Get node by given hostname and application. The applicationLock must be held when calling this */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 2bd0c91f4a1..ad887212a05 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -672,14 +672,23 @@ public class NodeFailerTest { @Test public void testUpness() { - assertFalse(badNode(0, 0, 0)); - assertFalse(badNode(0, 0, 2)); - assertFalse(badNode(0, 3, 0)); - assertFalse(badNode(0, 3, 2)); - assertTrue(badNode(1, 0, 0)); - assertTrue(badNode(1, 0, 2)); - assertFalse(badNode(1, 3, 0)); - assertFalse(badNode(1, 3, 2)); + assertFalse(badNode(0, 0, 0, 0)); + assertFalse(badNode(0, 0, 0, 2)); + assertFalse(badNode(0, 3, 0, 0)); + assertFalse(badNode(0, 3, 0, 2)); + assertTrue(badNode(1, 0, 0, 0)); + assertTrue(badNode(1, 0, 0, 2)); + assertFalse(badNode(1, 3, 0, 0)); + assertFalse(badNode(1, 3, 0, 2)); + + assertFalse(badNode(0, 0, 1, 0)); + assertFalse(badNode(0, 0, 1, 2)); + assertFalse(badNode(0, 3, 1, 0)); + assertFalse(badNode(0, 3, 1, 2)); + assertFalse(badNode(1, 0, 1, 0)); + assertFalse(badNode(1, 0, 1, 2)); + assertFalse(badNode(1, 3, 1, 0)); + assertFalse(badNode(1, 3, 1, 2)); } private void addServiceInstances(List<ServiceInstance> list, ServiceStatus status, int num) { @@ -690,10 +699,11 @@ public class NodeFailerTest { } } - private boolean badNode(int numDown, int numUp, int numNotChecked) { + private boolean badNode(int numDown, int numUp, int numUnknown, int numNotChecked) { List<ServiceInstance> services = new ArrayList<>(); addServiceInstances(services, ServiceStatus.DOWN, numDown); addServiceInstances(services, ServiceStatus.UP, numUp); + addServiceInstances(services, ServiceStatus.UNKNOWN, numUnknown); addServiceInstances(services, ServiceStatus.NOT_CHECKED, numNotChecked); Collections.shuffle(services); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index 6e88b227e61..b8538079194 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -126,7 +126,10 @@ class ClusterApiImpl implements ClusterApi { continue; } - if (service.serviceStatus() == ServiceStatus.DOWN) { + if (service.serviceStatus() == ServiceStatus.UNKNOWN) { + reasons.mergeWith(SuspensionReasons.unknownStatus(service)); + continue; + } else if (service.serviceStatus() == ServiceStatus.DOWN) { Optional<Instant> since = service.serviceStatusInfo().since(); if (since.isEmpty()) { reasons.mergeWith(SuspensionReasons.isDown(service)); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java index c043396497b..4a1b4528d02 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java @@ -23,12 +23,19 @@ public class SuspensionReasons { private final Map<HostName, List<String>> reasons = new HashMap<>(); public static SuspensionReasons nothingNoteworthy() { return new SuspensionReasons(); } + public static SuspensionReasons isDown(ServiceInstance service) { return new SuspensionReasons().addReason( service.hostName(), service.descriptiveName() + " is down"); } + public static SuspensionReasons unknownStatus(ServiceInstance service) { + return new SuspensionReasons().addReason( + service.hostName(), + service.descriptiveName() + " has not yet been probed for health"); + } + public static SuspensionReasons downSince(ServiceInstance service, Instant instant, Duration downDuration) { return new SuspensionReasons().addReason( service.hostName(), diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java index c3e53b2f340..1138dd3f738 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java @@ -165,9 +165,11 @@ public class ApplicationApiImplTest { verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UP, true); verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.NOT_CHECKED, true); verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.DOWN, true); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UNKNOWN, true); verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UP, false); verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.NOT_CHECKED, false); verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.DOWN, false); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UNKNOWN, false); } private void verifyNoRemarksConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index c61c8eb34ec..2946c82eab8 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -192,7 +192,7 @@ public class ClusterApiImplTest { @Test public void testSingleConfigServerCanSuspend() { - for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN)) { + for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN, ServiceStatus.UNKNOWN)) { var clusterApi = makeConfigClusterApi(1, status); var policy = new HostedVespaClusterPolicy(flagSource, zone); try { diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java index d6e15f6af4e..fd7c77e2a20 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java @@ -9,6 +9,8 @@ import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceStatusInfo; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.service.duper.DuperModelManager; import com.yahoo.vespa.service.executor.RunletExecutorImpl; import com.yahoo.vespa.service.manager.HealthMonitorApi; @@ -54,13 +56,14 @@ public class HealthMonitorManager implements MonitorManager, HealthMonitorApi { private final ApplicationHealthMonitorFactory applicationHealthMonitorFactory; @Inject - public HealthMonitorManager(DuperModelManager duperModel) { + public HealthMonitorManager(DuperModelManager duperModel, FlagSource flagSource) { this(duperModel, new StateV1HealthModel( TARGET_HEALTH_STALENESS, HEALTH_REQUEST_TIMEOUT, KEEP_ALIVE, - new RunletExecutorImpl(THREAD_POOL_SIZE))); + new RunletExecutorImpl(THREAD_POOL_SIZE), + Flags.USE_UNKNOWN_SERVICE_STATUS.bindTo(flagSource).value())); } private HealthMonitorManager(DuperModelManager duperModel, StateV1HealthModel healthModel) { diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthEndpoint.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthEndpoint.java index 73f0480bf96..2128d2b77c3 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthEndpoint.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthEndpoint.java @@ -20,6 +20,7 @@ class StateV1HealthEndpoint implements HealthEndpoint { private final Duration connectionKeepAlive; private final Duration delay; private final RunletExecutor executor; + private boolean useUnknownServiceStatus; StateV1HealthEndpoint(ServiceId serviceId, HostName hostname, @@ -27,10 +28,12 @@ class StateV1HealthEndpoint implements HealthEndpoint { Duration delay, Duration requestTimeout, Duration connectionKeepAlive, - RunletExecutor executor) { + RunletExecutor executor, + boolean useUnknownServiceStatus) { this.serviceId = serviceId; this.delay = delay; this.executor = executor; + this.useUnknownServiceStatus = useUnknownServiceStatus; this.url = uncheck(() -> new URL("http", hostname.value(), port, "/state/v1/health")); this.requestTimeout = requestTimeout; this.connectionKeepAlive = connectionKeepAlive; @@ -43,7 +46,7 @@ class StateV1HealthEndpoint implements HealthEndpoint { @Override public HealthMonitor startMonitoring() { - StateV1HealthUpdater updater = new StateV1HealthUpdater(url, requestTimeout, connectionKeepAlive); + var updater = new StateV1HealthUpdater(url, requestTimeout, connectionKeepAlive, useUnknownServiceStatus); return new StateV1HealthMonitor(updater, executor, delay); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java index 0408e0134ea..81a6d6d6013 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java @@ -30,15 +30,18 @@ public class StateV1HealthModel implements AutoCloseable { private final Duration requestTimeout; private final Duration connectionKeepAlive; private final RunletExecutor executor; + private final boolean useUnknownServiceStatus; StateV1HealthModel(Duration targetHealthStaleness, Duration requestTimeout, Duration connectionKeepAlive, - RunletExecutor executor) { + RunletExecutor executor, + boolean useUnknownServiceStatus) { this.targetHealthStaleness = targetHealthStaleness; this.requestTimeout = requestTimeout; this.connectionKeepAlive = connectionKeepAlive; this.executor = executor; + this.useUnknownServiceStatus = useUnknownServiceStatus; } Map<ServiceId, HealthEndpoint> extractHealthEndpoints(ApplicationInfo application) { @@ -57,7 +60,8 @@ public class StateV1HealthModel implements AutoCloseable { targetHealthStaleness, requestTimeout, connectionKeepAlive, - executor); + executor, + useUnknownServiceStatus); endpoints.put(serviceId, endpoint); break; // Avoid >1 endpoints per serviceId } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthUpdater.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthUpdater.java index 5813e2cef39..c544ea4b8b4 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthUpdater.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthUpdater.java @@ -24,13 +24,16 @@ class StateV1HealthUpdater implements HealthUpdater { private final String endpoint; private final StateV1HealthClient healthClient; - private volatile ServiceStatusInfo serviceStatusInfo = new ServiceStatusInfo(ServiceStatus.NOT_CHECKED); + private volatile ServiceStatusInfo serviceStatusInfo; - StateV1HealthUpdater(URL url, Duration requestTimeout, Duration connectionKeepAlive) { - this(url.toString(), new StateV1HealthClient(url, requestTimeout, connectionKeepAlive)); + StateV1HealthUpdater(URL url, Duration requestTimeout, Duration connectionKeepAlive, + boolean useUnknownServiceStatus) { + this(url.toString(), new StateV1HealthClient(url, requestTimeout, connectionKeepAlive), useUnknownServiceStatus); } - StateV1HealthUpdater(String endpoint, StateV1HealthClient healthClient) { + StateV1HealthUpdater(String endpoint, StateV1HealthClient healthClient, boolean useUnknownServiceStatus) { + var serviceStatus = useUnknownServiceStatus ? ServiceStatus.UNKNOWN : ServiceStatus.NOT_CHECKED; + this.serviceStatusInfo = new ServiceStatusInfo(serviceStatus); this.endpoint = endpoint; this.healthClient = healthClient; } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java index 9486bf505ab..891fbbd1885 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java @@ -17,7 +17,7 @@ public interface ServiceStatusProvider { * Get the {@link ServiceStatus} of a particular service. * * <p>{@link ServiceStatus#NOT_CHECKED NOT_CHECKED} must be returned if the - * service status provider does does not monitor the service status for + * service status provider does not monitor the service status for * the particular application, cluster, service type, and config id. */ ServiceStatusInfo getStatus(ApplicationId applicationId, diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthModelTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthModelTest.java index 94f9e8e0999..70454ffdd58 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthModelTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthModelTest.java @@ -41,7 +41,7 @@ public class StateV1HealthModelTest { private final List<HostName> hostnames = Stream.of("host1", "host2").map(HostName::from).collect(Collectors.toList()); private final ApplicationInfo proxyHostApplicationInfo = proxyHostApplication.makeApplicationInfo(hostnames); - private final StateV1HealthModel model = new StateV1HealthModel(healthStaleness, requestTimeout, keepAlive, executor); + private final StateV1HealthModel model = new StateV1HealthModel(healthStaleness, requestTimeout, keepAlive, executor, false); @Test public void test() { diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthMonitorTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthMonitorTest.java index e3fb7d08d93..bec33a7dbfd 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthMonitorTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthMonitorTest.java @@ -17,7 +17,7 @@ public class StateV1HealthMonitorTest { public void downThenUpThenDown() throws Exception { StateV1HealthClient client = mock(StateV1HealthClient.class); - StateV1HealthUpdater updater = new StateV1HealthUpdater("https://foo/state/v1/health", client); + StateV1HealthUpdater updater = new StateV1HealthUpdater("https://foo/state/v1/health", client, false); RunletExecutor executor = new RunletExecutorImpl(2); try (StateV1HealthMonitor monitor = new StateV1HealthMonitor(updater, executor, Duration.ofMillis(10))) { assertEquals(ServiceStatus.NOT_CHECKED, monitor.getStatus().serviceStatus()); diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthUpdaterTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthUpdaterTest.java index bb60d049aa8..833507dfb6e 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthUpdaterTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/health/StateV1HealthUpdaterTest.java @@ -165,6 +165,6 @@ public class StateV1HealthUpdaterTest { private StateV1HealthUpdater makeUpdater(CloseableHttpClient client, Function<HttpEntity, String> getContentFunction) { ApacheHttpClient apacheHttpClient = new ApacheHttpClient(url, client); StateV1HealthClient healthClient = new StateV1HealthClient(apacheHttpClient, getContentFunction); - return new StateV1HealthUpdater(url.toString(), healthClient); + return new StateV1HealthUpdater(url.toString(), healthClient, false); } }
\ No newline at end of file |