diff options
author | Håkon Hallingstad <hakon@oath.com> | 2019-01-16 08:14:14 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@oath.com> | 2019-01-16 08:14:14 +0100 |
commit | c8aa5a5ba53c24206f390a1eee39ec3e704a5fc4 (patch) | |
tree | a1b5cbbda6b79263db725454fea7e750f75e2600 /service-monitor/src/main | |
parent | 7b210b2bda57003de6fff1dbe28348fe6685fa0c (diff) |
Support monitoring health of tenant hosts
Diffstat (limited to 'service-monitor/src/main')
10 files changed, 147 insertions, 38 deletions
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java index 91759b32086..25479b7b03a 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java @@ -9,9 +9,6 @@ import com.yahoo.vespa.applicationmodel.ServiceType; * A service/application model of the config server with health status. */ public class ConfigServerApplication extends ConfigServerLikeApplication { - - public static final ConfigServerApplication CONFIG_SERVER_APPLICATION = new ConfigServerApplication(); - public ConfigServerApplication() { super("zone-config-servers", NodeType.config, ClusterSpec.Type.admin, ServiceType.CONFIG_SERVER); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/HostAdminApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/HostAdminApplication.java index 5e6cb23e9c1..7772f989746 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/HostAdminApplication.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/HostAdminApplication.java @@ -9,8 +9,10 @@ import com.yahoo.vespa.applicationmodel.ServiceType; * @author hakonhall */ public abstract class HostAdminApplication extends InfraApplication { + public static final int HOST_ADMIN_HEALT_PORT = 8080; + protected HostAdminApplication(String applicationName, NodeType nodeType) { super(applicationName, nodeType, ClusterSpec.Type.container, ClusterSpec.Id.from(applicationName), - ServiceType.HOST_ADMIN, 8080); + ServiceType.HOST_ADMIN, HOST_ADMIN_HEALT_PORT); } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ZoneApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ZoneApplication.java index 65198e72c89..70354c0f16d 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ZoneApplication.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ZoneApplication.java @@ -1,9 +1,15 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.service.duper; +import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.TenantName; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.service.model.ApplicationInstanceGenerator; import java.util.Objects; @@ -17,14 +23,84 @@ import java.util.Objects; public class ZoneApplication { private ZoneApplication() {} - public static final ApplicationId ZONE_APPLICATION_ID = InfraApplication + private static final ApplicationId ZONE_APPLICATION_ID = InfraApplication .createHostedVespaApplicationId("routing"); + private static final ClusterId NODE_ADMIN_CLUSTER_ID = new ClusterId("node-admin"); + private static final ClusterId ROUTING_CLUSTER_ID = new ClusterId("routing"); + + public static ApplicationId getApplicationId() { + return ZONE_APPLICATION_ID; + } + + public static TenantName getTenantName() { + return ZONE_APPLICATION_ID.tenant(); + } + + public static ApplicationName getApplicationName() { + return ZONE_APPLICATION_ID.application(); + } + + public static NodeType getNodeAdminNodeType() { + return NodeType.host; + } + + public static ClusterId getNodeAdminClusterId() { + return NODE_ADMIN_CLUSTER_ID; + } + + public static ClusterSpec.Type getNodeAdminClusterSpecType() { + return ClusterSpec.Type.container; + } + + public static ClusterSpec.Id getNodeAdminClusterSpecId() { + return new ClusterSpec.Id(getNodeAdminClusterId().s()); + } + + public static ServiceType getNodeAdminServiceType() { + return ServiceType.CONTAINER; + } + + public static int getNodeAdminHealthPort() { + return HostAdminApplication.HOST_ADMIN_HEALT_PORT; + } + + public static NodeType getRoutingNodeType() { + return NodeType.proxy; + } + + public static ClusterId getRoutingClusterId() { + return ROUTING_CLUSTER_ID; + } + + public static ClusterSpec.Type getRoutingClusterSpecType() { + return ClusterSpec.Type.container; + } + + public static ClusterSpec.Id getRoutingClusterSpecId() { + return new ClusterSpec.Id(getRoutingClusterId().s()); + } + + public static ServiceType getRoutingServiceType() { + return ServiceType.CONTAINER; + } + + public static int getRoutingHealthPort() { + return 4088; + } public static boolean isNodeAdminService(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType) { - return Objects.equals(applicationId, ZONE_APPLICATION_ID) && - Objects.equals(serviceType, ServiceType.CONTAINER) && - Objects.equals(clusterId, ClusterId.NODE_ADMIN); + return Objects.equals(applicationId, getApplicationId()) && + Objects.equals(serviceType, getNodeAdminServiceType()) && + Objects.equals(clusterId, getNodeAdminClusterId()); + } + + /** Whether a {@link ServiceInfo} belongs to the zone application's node-admin cluster. */ + public static boolean isNodeAdminServiceInfo(ApplicationId applicationId, ServiceInfo serviceInfo) { + return isNodeAdminService( + applicationId, + ApplicationInstanceGenerator.getClusterId(serviceInfo), + ApplicationInstanceGenerator.toServiceType(serviceInfo)); } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/ApplicationHealthMonitor.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/ApplicationHealthMonitor.java index 5fab8ac8591..5eac6fbb000 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/ApplicationHealthMonitor.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/ApplicationHealthMonitor.java @@ -47,11 +47,6 @@ class ApplicationHealthMonitor implements ServiceStatusProvider, AutoCloseable { } @Override - public boolean wouldMonitor(ApplicationId applicationId) { - return true; - } - - @Override public ServiceStatus getStatus(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType, diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java index 71938a9f1dc..ddfbdf59b3c 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/HealthMonitorManager.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.service.duper.DuperModelManager; import com.yahoo.vespa.service.duper.ZoneApplication; import com.yahoo.vespa.service.executor.RunletExecutorImpl; @@ -47,20 +48,34 @@ public class HealthMonitorManager implements MonitorManager { private final ConcurrentHashMap<ApplicationId, ApplicationHealthMonitor> healthMonitors = new ConcurrentHashMap<>(); private final DuperModelManager duperModel; + private final boolean monitorTenantHostHealth; private final ApplicationHealthMonitorFactory applicationHealthMonitorFactory; @Inject public HealthMonitorManager(DuperModelManager duperModel, FlagSource flagSource) { - this(duperModel, new StateV1HealthModel( - TARGET_HEALTH_STALENESS, HEALTH_REQUEST_TIMEOUT, KEEP_ALIVE, new RunletExecutorImpl(THREAD_POOL_SIZE))); + this(duperModel, Flags.MONITOR_TENANT_HOST_HEALTH.bindTo(flagSource).value()); } - private HealthMonitorManager(DuperModelManager duperModel, StateV1HealthModel healthModel) { - this(duperModel, id -> new ApplicationHealthMonitor(id, healthModel)); + private HealthMonitorManager(DuperModelManager duperModel, boolean monitorTenantHostHealth) { + this(duperModel, monitorTenantHostHealth, + new StateV1HealthModel( + TARGET_HEALTH_STALENESS, + HEALTH_REQUEST_TIMEOUT, + KEEP_ALIVE, + new RunletExecutorImpl(THREAD_POOL_SIZE), + monitorTenantHostHealth)); } - HealthMonitorManager(DuperModelManager duperModel, ApplicationHealthMonitorFactory applicationHealthMonitorFactory) { + private HealthMonitorManager(DuperModelManager duperModel, boolean monitorTenantHostHealth, StateV1HealthModel healthModel) { + this(duperModel, monitorTenantHostHealth, id -> new ApplicationHealthMonitor(id, healthModel)); + } + + /** Default access due to testing. */ + HealthMonitorManager(DuperModelManager duperModel, + boolean monitorTenantHostHealth, + ApplicationHealthMonitorFactory applicationHealthMonitorFactory) { this.duperModel = duperModel; + this.monitorTenantHostHealth = monitorTenantHostHealth; this.applicationHealthMonitorFactory = applicationHealthMonitorFactory; } @@ -86,22 +101,41 @@ public class HealthMonitorManager implements MonitorManager { ClusterId clusterId, ServiceType serviceType, ConfigId configId) { - if (ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType)) { - // If node admin doesn't run in a JDisc container, it must be monitored with health. - // TODO: Do proper health check + ApplicationHealthMonitor monitor = healthMonitors.get(applicationId); + + if (!monitorTenantHostHealth && ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType)) { + // Legacy: The zone app is not health monitored (monitor == null), but the node-admin cluster's services + // are hard-coded to be UP return ServiceStatus.UP; } - ApplicationHealthMonitor monitor = healthMonitors.get(applicationId); if (monitor == null) { return ServiceStatus.NOT_CHECKED; } + if (monitorTenantHostHealth && applicationId.equals(ZoneApplication.getApplicationId())) { + // New: The zone app is health monitored (monitor != null), possibly even the routing cluster + // which is a normal jdisc container (unnecessary but harmless), but the node-admin cluster + // are tenant Docker hosts running host admin that are monitored via /state/v1/health. + if (ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType)) { + return monitor.getStatus(applicationId, clusterId, serviceType, configId); + } else { + return ServiceStatus.NOT_CHECKED; + } + } + return monitor.getStatus(applicationId, clusterId, serviceType, configId); } - @Override - public boolean wouldMonitor(ApplicationId id) { - return duperModel.isSupportedInfraApplication(id); + private boolean wouldMonitor(ApplicationId id) { + if (duperModel.isSupportedInfraApplication(id)) { + return true; + } + + if (monitorTenantHostHealth && id.equals(ZoneApplication.getApplicationId())) { + return true; + } + + return false; } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java index 5e8979deb9f..04943f81478 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/health/StateV1HealthModel.java @@ -6,6 +6,7 @@ import com.yahoo.config.model.api.HostInfo; import com.yahoo.config.model.api.PortInfo; import com.yahoo.config.model.api.ServiceInfo; import com.yahoo.config.provision.HostName; +import com.yahoo.vespa.service.duper.ZoneApplication; import com.yahoo.vespa.service.executor.RunletExecutor; import com.yahoo.vespa.service.model.ApplicationInstanceGenerator; import com.yahoo.vespa.service.model.ServiceId; @@ -29,23 +30,36 @@ public class StateV1HealthModel implements AutoCloseable { private final Duration requestTimeout; private final Duration connectionKeepAlive; private final RunletExecutor executor; + private final boolean monitorTenantHostHealth; StateV1HealthModel(Duration targetHealthStaleness, Duration requestTimeout, Duration connectionKeepAlive, - RunletExecutor executor) { + RunletExecutor executor, + boolean monitorTenantHostHealth) { this.targetHealthStaleness = targetHealthStaleness; this.requestTimeout = requestTimeout; this.connectionKeepAlive = connectionKeepAlive; this.executor = executor; + this.monitorTenantHostHealth = monitorTenantHostHealth; } Map<ServiceId, HealthEndpoint> extractHealthEndpoints(ApplicationInfo application) { Map<ServiceId, HealthEndpoint> endpoints = new HashMap<>(); + boolean isZoneApplication = application.getApplicationId().equals(ZoneApplication.getApplicationId()); + for (HostInfo hostInfo : application.getModel().getHosts()) { HostName hostname = HostName.from(hostInfo.getHostname()); for (ServiceInfo serviceInfo : hostInfo.getServices()) { + + if (monitorTenantHostHealth && isZoneApplication && + !ZoneApplication.isNodeAdminServiceInfo(application.getApplicationId(), serviceInfo)) { + // Only the node admin/host admin cluster of the zone application should be monitored + // TODO: Move the node admin cluster out to a separate infrastructure application + continue; + } + ServiceId serviceId = ApplicationInstanceGenerator.getServiceId(application, serviceInfo); for (PortInfo portInfo : serviceInfo.getPorts()) { if (portInfo.getTags().containsAll(HTTP_HEALTH_PORT_TAGS)) { diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/manager/UnionMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/manager/UnionMonitorManager.java index eacaf820f3d..cfd9269d9c4 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/manager/UnionMonitorManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/manager/UnionMonitorManager.java @@ -38,11 +38,6 @@ public class UnionMonitorManager implements MonitorManager { } @Override - public boolean wouldMonitor(ApplicationId id) { - return healthMonitorManager.wouldMonitor(id) || slobrokMonitorManager.wouldMonitor(id); - } - - @Override public void applicationActivated(ApplicationInfo application) { slobrokMonitorManager.applicationActivated(application); healthMonitorManager.applicationActivated(application); diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java index f15101d4439..3ca9446df26 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java @@ -131,7 +131,7 @@ public class ApplicationInstanceGenerator { toConfigId(serviceInfo)); } - private static ClusterId getClusterId(ServiceInfo serviceInfo) { + public static ClusterId getClusterId(ServiceInfo serviceInfo) { return new ClusterId(serviceInfo.getProperty(CLUSTER_ID_PROPERTY_NAME).orElse("")); } @@ -141,7 +141,7 @@ public class ApplicationInstanceGenerator { return new ServiceClusterKey(clusterId, serviceType); } - private static ServiceType toServiceType(ServiceInfo serviceInfo) { + public static ServiceType toServiceType(ServiceInfo serviceInfo) { return new ServiceType(serviceInfo.getServiceType()); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java index 848cf68c48b..88c72a7d47a 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java @@ -22,7 +22,4 @@ public interface ServiceStatusProvider { ClusterId clusterId, ServiceType serviceType, ConfigId configId); - - /** Returns true if the status provider would start monitoring the application. */ - boolean wouldMonitor(ApplicationId applicationId); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/slobrok/SlobrokMonitorManagerImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/slobrok/SlobrokMonitorManagerImpl.java index 13f24bc3694..0f3a3cfbe68 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/slobrok/SlobrokMonitorManagerImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/slobrok/SlobrokMonitorManagerImpl.java @@ -108,8 +108,7 @@ public class SlobrokMonitorManagerImpl implements SlobrokApi, MonitorManager { } } - @Override - public boolean wouldMonitor(ApplicationId applicationId) { + private boolean wouldMonitor(ApplicationId applicationId) { if (duperModel.isSupportedInfraApplication(applicationId)) { return false; } |