From 7afd37ccba6960cf845710731d009f555672f6a2 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Wed, 19 Jan 2022 09:45:13 +0100 Subject: Handle services using restartOnDeploy when checking config convergence --- .../vespa/config/server/ApplicationRepository.java | 2 +- .../application/ConfigConvergenceChecker.java | 50 ++++++++++++++++++++-- .../vespa/config/server/deploy/Deployment.java | 16 +++---- .../server/MockConfigConvergenceChecker.java | 7 ++- .../application/ConfigConvergenceCheckerTest.java | 4 +- 5 files changed, 62 insertions(+), 17 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 2ee3e6c0343..ccd9f0e7044 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -757,7 +757,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye public ServiceListResponse servicesToCheckForConfigConvergence(ApplicationId applicationId, Duration timeoutPerService, Optional vespaVersion) { - return convergeChecker.getConfigGenerationsForAllServices(getApplication(applicationId, vespaVersion), timeoutPerService); + return convergeChecker.checkConvergenceForAllServices(getApplication(applicationId, vespaVersion), timeoutPerService); } public ConfigConvergenceChecker configConvergenceChecker() { return convergeChecker; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ConfigConvergenceChecker.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ConfigConvergenceChecker.java index c734b01aee3..ba0392ffc5a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ConfigConvergenceChecker.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ConfigConvergenceChecker.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.config.model.api.ApplicationClusterInfo; import com.yahoo.config.model.api.HostInfo; import com.yahoo.config.model.api.PortInfo; import com.yahoo.config.model.api.ServiceInfo; @@ -44,6 +45,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.config.model.api.container.ContainerServiceType.CLUSTERCONTROLLER_CONTAINER; import static com.yahoo.config.model.api.container.ContainerServiceType.CONTAINER; @@ -83,18 +85,40 @@ public class ConfigConvergenceChecker extends AbstractComponent { /** Fetches the active config generation for all services in the given application. */ public Map getServiceConfigGenerations(Application application, Duration timeoutPerService) { + return getServiceConfigGenerations(application, timeoutPerService, true); + } + + /** + * Fetches the active config generation for all services in the given application. Will not check services + * which defer config changes until restart if checkAll is false. + */ + private Map getServiceConfigGenerations(Application application, Duration timeoutPerService, boolean checkAll) { List servicesToCheck = new ArrayList<>(); application.getModel().getHosts() .forEach(host -> host.getServices().stream() .filter(service -> serviceTypesToCheck.contains(service.getServiceType())) + .filter(serviceInfo -> shouldCheckService(checkAll, application, serviceInfo)) .forEach(service -> getStatePort(service).ifPresent(port -> servicesToCheck.add(service)))); + log.log(Level.FINE, "Services to check for config convergence: " + servicesToCheck); return getServiceGenerations(servicesToCheck, timeoutPerService); } - /** Check all services in given application. Returns the minimum current generation of all services */ - public ServiceListResponse getConfigGenerationsForAllServices(Application application, Duration timeoutPerService) { - Map currentGenerations = getServiceConfigGenerations(application, timeoutPerService); + /** Checks all services in given application. Returns the minimum current generation of all services */ + public ServiceListResponse checkConvergenceForAllServices(Application application, Duration timeoutPerService) { + return checkConvergence(application, timeoutPerService, true); + } + + /** + * Checks services except those which defer config changes until restart in the given application. + * Returns the minimum current generation of those services. + */ + public ServiceListResponse checkConvergenceUnlessDeferringChangesUntilRestart(Application application, Duration timeoutPerService) { + return checkConvergence(application, timeoutPerService, false); + } + + private ServiceListResponse checkConvergence(Application application, Duration timeoutPerService, boolean checkAll) { + Map currentGenerations = getServiceConfigGenerations(application, timeoutPerService, checkAll); long currentGeneration = currentGenerations.values().stream().mapToLong(Long::longValue).min().orElse(-1); return new ServiceListResponse(currentGenerations, application.getApplicationGeneration(), currentGeneration); } @@ -116,6 +140,26 @@ public class ConfigConvergenceChecker extends AbstractComponent { } } + private boolean shouldCheckService(boolean checkServicesWithDeferChangesUntilRestart, Application application, ServiceInfo serviceInfo) { + if (checkServicesWithDeferChangesUntilRestart) return true; + if (isNotContainer(serviceInfo)) return true; + return serviceIsInClusterWhichShouldBeChecked(application, serviceInfo); + } + + private boolean isNotContainer(ServiceInfo serviceInfo) { + return ! List.of(CONTAINER.serviceName, QRSERVER.serviceName, METRICS_PROXY_CONTAINER).contains(serviceInfo.getServiceType()); + } + + // Don't check service in a cluster which uses restartOnDeploy (new config will not be used until service is restarted) + private boolean serviceIsInClusterWhichShouldBeChecked(Application application, ServiceInfo serviceInfo) { + Set excludeFromChecking = application.getModel().applicationClusterInfo() + .stream() + .filter(ApplicationClusterInfo::getDeferChangesUntilRestart) + .collect(Collectors.toSet()); + + return excludeFromChecking.stream().noneMatch(info -> info.name().equals(serviceInfo.getProperty("clustername").orElse(""))); + } + /** Gets service generation for a list of services (in parallel). */ private Map getServiceGenerations(List services, Duration timeout) { try (CloseableHttpAsyncClient client = createHttpClient()) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index 7d59a912542..099f0be6fe9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -193,20 +193,16 @@ public class Deployment implements com.yahoo.config.provision.Deployment { throw new ConfigNotConvergedException(e); } - Application app = applicationRepository.getActiveApplication(applicationId); - - // TODO: Don't wait for config convergence if restartOnDeploy is true for one o more container clusters - // (ideally wait for config convergence for all other services) - - log.info(session.logPre() + "Wait for services to converge on new generation before restarting"); + deployLogger.log(Level.INFO, "Wait for services to converge on new generation before restarting"); ConfigConvergenceChecker convergenceChecker = applicationRepository.configConvergenceChecker(); - ServiceListResponse response = convergenceChecker.getConfigGenerationsForAllServices(app, timeout); + Application app = applicationRepository.getActiveApplication(applicationId); + ServiceListResponse response = convergenceChecker.checkConvergenceUnlessDeferringChangesUntilRestart(app, timeout); if (response.converged) { - log.info(session.logPre() + "Services converged on new generation " + response.currentGeneration); + deployLogger.log(Level.INFO, "Services converged on new generation " + response.currentGeneration); return; } else { - log.info(session.logPre() + "Services not converged on new generation, wanted generation: " + response.wantedGeneration + - ", current generation: " + response.currentGeneration + ", will retry"); + deployLogger.log(Level.INFO, "Services not converged on new generation, wanted generation: " + + response.wantedGeneration + ", current generation: " + response.currentGeneration + ", will retry"); try { Thread.sleep(10_000); } catch (InterruptedException e) { /* ignore */ } } } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java b/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java index b21d89fa626..e63fdf9bf19 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java @@ -21,7 +21,7 @@ public class MockConfigConvergenceChecker extends ConfigConvergenceChecker { } @Override - public ServiceListResponse getConfigGenerationsForAllServices(Application application, Duration timeoutPerService) { + public ServiceListResponse checkConvergenceForAllServices(Application application, Duration timeoutPerService) { return new ServiceListResponse(Map.of(), wantedGeneration, wantedGeneration); } @@ -30,4 +30,9 @@ public class MockConfigConvergenceChecker extends ConfigConvergenceChecker { return new ServiceResponse(ServiceResponse.Status.ok, wantedGeneration); } + @Override + public ServiceListResponse checkConvergenceUnlessDeferringChangesUntilRestart(Application application, Duration timeoutPerService) { + return new ServiceListResponse(Map.of(), wantedGeneration, wantedGeneration); + } + } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ConfigConvergenceCheckerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ConfigConvergenceCheckerTest.java index 148cd8ac24f..6016ce991d0 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ConfigConvergenceCheckerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ConfigConvergenceCheckerTest.java @@ -91,7 +91,7 @@ public class ConfigConvergenceCheckerTest { { wireMock.stubFor(get(urlEqualTo("/state/v1/config")).willReturn(okJson("{\"config\":{\"generation\":3}}"))); - ServiceListResponse response = checker.getConfigGenerationsForAllServices(application, clientTimeout); + ServiceListResponse response = checker.checkConvergenceForAllServices(application, clientTimeout); assertEquals(3, response.wantedGeneration); assertEquals(3, response.currentGeneration); assertTrue(response.converged); @@ -114,7 +114,7 @@ public class ConfigConvergenceCheckerTest { URI requestUrl = testServer().resolve("/serviceconverge"); - ServiceListResponse response = checker.getConfigGenerationsForAllServices(application, clientTimeout); + ServiceListResponse response = checker.checkConvergenceForAllServices(application, clientTimeout); assertEquals(4, response.wantedGeneration); assertEquals(3, response.currentGeneration); assertFalse(response.converged); -- cgit v1.2.3