diff options
author | jonmv <venstad@gmail.com> | 2024-01-12 16:12:26 +0100 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2024-01-12 16:12:26 +0100 |
commit | e49f46a6e42d3cddc8dca363d90f27a24c483024 (patch) | |
tree | 41554882ac71d3416103d90f4b9d558bf5d97140 /configserver | |
parent | 469d7dc346f3c900953e9cd8110c6fa070ff5e74 (diff) |
Add a maintainer for triggering node restarts triggered by deployment
Diffstat (limited to 'configserver')
12 files changed, 364 insertions, 65 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index fc60f1225a7..742e92500d9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -56,6 +56,7 @@ import com.yahoo.vespa.config.server.application.ActiveTokenFingerprints; import com.yahoo.vespa.config.server.application.DefaultClusterReindexingStatusClient; import com.yahoo.vespa.config.server.application.FileDistributionStatus; import com.yahoo.vespa.config.server.application.HttpProxy; +import com.yahoo.vespa.config.server.application.PendingRestarts; import com.yahoo.vespa.config.server.application.TenantApplications; import com.yahoo.vespa.config.server.configchange.ConfigChangeActions; import com.yahoo.vespa.config.server.configchange.RefeedActions; @@ -1041,6 +1042,15 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye tenant.getApplicationRepo().database().modifyReindexing(id, ApplicationReindexing.empty(), modifications); } + public PendingRestarts getPendingRestarts(ApplicationId id) { + return requireDatabase(id).readPendingRestarts(id); + } + + public void modifyPendingRestarts(ApplicationId id, UnaryOperator<PendingRestarts> modifications) { + if (hostProvisioner.isEmpty()) return; + getTenant(id).getApplicationRepo().database().modifyPendingRestarts(id, modifications); + } + public ConfigserverConfig configserverConfig() { return configserverConfig; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java index e17fda76a01..b2dbea569b6 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java @@ -28,6 +28,8 @@ import java.util.concurrent.ExecutorService; import java.util.function.UnaryOperator; import static com.yahoo.vespa.curator.transaction.CuratorOperations.setData; +import static com.yahoo.yolean.Exceptions.uncheck; +import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toUnmodifiableMap; /** @@ -197,6 +199,25 @@ public class ApplicationCuratorDatabase { .toList(); } + public PendingRestarts readPendingRestarts(ApplicationId id) { + return curator.getData(pendingRestartsPath(id)) + .map(PendingRestartsSerializer::fromBytes) + .orElse(PendingRestarts.empty()); + } + + public void modifyPendingRestarts(ApplicationId id, UnaryOperator<PendingRestarts> modification) { + try (Lock lock = curator.lock(restartsLockPath(id), Duration.ofMinutes(1))) { + PendingRestarts original = readPendingRestarts(id); + PendingRestarts modified = modification.apply(original); + if (original != modified) { + if (modified.isEmpty()) + curator.delete(pendingRestartsPath(id)); + else + curator.set(pendingRestartsPath(id), PendingRestartsSerializer.toBytes(modified)); + } + } + } + public Optional<ApplicationReindexing> readReindexingStatus(ApplicationId id) { return curator.getData(reindexingDataPath(id)) .map(ReindexingStatusSerializer::fromBytes); @@ -211,6 +232,10 @@ public class ApplicationCuratorDatabase { return curator.createDirectoryCache(applicationsPath.getAbsolute(), false, false, zkCacheExecutor); } + private Path restartsLockPath(ApplicationId id) { + return locksPath.append(id.serializedForm() + "::restarts"); + } + private Path reindexingLockPath(ApplicationId id) { return locksPath.append(id.serializedForm()).append("reindexing"); } @@ -227,6 +252,38 @@ public class ApplicationCuratorDatabase { return applicationPath(id).append("reindexing"); } + private Path pendingRestartsPath(ApplicationId id) { + return applicationPath(id).append("restarts"); + } + + private static class PendingRestartsSerializer { + + private static final String GENERATIONS = "generations"; + private static final String GENERATION = "generation"; + private static final String HOSTNAMES = "hostnames"; + + private static byte[] toBytes(PendingRestarts pendingRestarts) { + Cursor root = new Slime().setObject(); + Cursor generationsArray = root.setArray(GENERATIONS); + pendingRestarts.generationsForRestarts().forEach((generation, hostnames) -> { + Cursor generationObject = generationsArray.addObject(); + generationObject.setLong(GENERATION, generation); + hostnames.forEach(generationObject.setArray(HOSTNAMES)::addString); + }); + return uncheck(() -> SlimeUtils.toJsonBytes(root)); + } + + private static PendingRestarts fromBytes(byte[] data) { + Cursor root = SlimeUtils.jsonToSlimeOrThrow(data).get(); + return new PendingRestarts(SlimeUtils.entriesStream(root.field(GENERATIONS)) + .collect(toMap(entry -> entry.field(GENERATION).asLong(), + entry -> SlimeUtils.entriesStream(entry.field(HOSTNAMES)) + .map(Inspector::asString) + .toList()))); + } + + } + private static class ReindexingStatusSerializer { private static final String ENABLED = "enabled"; @@ -263,7 +320,7 @@ public class ApplicationCuratorDatabase { setStatus(statusObject, status); }); }); - return Exceptions.uncheck(() -> SlimeUtils.toJsonBytes(root)); + return uncheck(() -> SlimeUtils.toJsonBytes(root)); } private static void setStatus(Cursor statusObject, Status status) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java new file mode 100644 index 00000000000..e573effc8a9 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java @@ -0,0 +1,58 @@ +package com.yahoo.vespa.config.server.application; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +import static java.util.Collections.unmodifiableMap; +import static java.util.Collections.unmodifiableSet; + +/** + * @author jonmv + */ +public class PendingRestarts { + + private static final PendingRestarts empty = new PendingRestarts(Map.of()); + + private final Map<Long, Set<String>> generationsForRestarts; + + public static PendingRestarts empty() { + return empty; + } + + PendingRestarts(Map<Long, ? extends Collection<String>> generationsForRestarts) { + Map<Long, Set<String>> builder = new LinkedHashMap<>(); + generationsForRestarts.forEach((generation, hostnames) -> builder.put(generation, unmodifiableSet(new LinkedHashSet<>(hostnames)))); + this.generationsForRestarts = unmodifiableMap(builder); + } + + public Map<Long, Set<String>> generationsForRestarts() { return generationsForRestarts; } + + public boolean isEmpty() { return generationsForRestarts.isEmpty(); } + + public PendingRestarts withRestarts(long atGeneration, Collection<String> hostnames) { + Map<Long, Set<String>> newRestarts = new LinkedHashMap<>(generationsForRestarts); + newRestarts.put(atGeneration, new LinkedHashSet<>(newRestarts.getOrDefault(atGeneration, Set.of())) {{ addAll(hostnames); }}); + return new PendingRestarts(newRestarts); + } + + public PendingRestarts withoutPreviousGenerations(long generation) { + Map<Long, Set<String>> newRestarts = new LinkedHashMap<>(generationsForRestarts); + newRestarts.keySet().removeIf(g -> g <= generation); + return new PendingRestarts(newRestarts); + } + + public Set<String> hostnames() { + return restartsReadyAt(Long.MAX_VALUE); + } + + public Set<String> restartsReadyAt(long generation) { + LinkedHashSet<String> ready = new LinkedHashSet<>(); + generationsForRestarts.forEach((g, hosts) -> { if (g <= generation) ready.addAll(hosts); }); + return ready; + } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java index f10bea5da6e..7c7608e5e5c 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java @@ -45,6 +45,7 @@ import java.util.stream.Collectors; import static com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse; import static com.yahoo.vespa.config.server.session.Session.Status.DELETE; +import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.toSet; /** @@ -171,60 +172,14 @@ public class Deployment implements com.yahoo.config.provision.Deployment { Set<String> nodesToRestart = session.getActivationTriggers().nodeRestarts().stream().map(NodeRestart::hostname).collect(toSet()); if (nodesToRestart.isEmpty()) return; - // TODO: replace this with a maintainer that waits for active config >= this session's config generation, - // and let config convergence waiter in controller also check _pending_ restarts, maintained by that. - // Here, we'll then instead hand these restarts over to that maintainer, with our session id. - waitForConfigToConverge(applicationId, nodesToRestart); - - provisioner.get().restart(applicationId, HostFilter.from(nodesToRestart)); - String restartsMessage = String.format("Scheduled service restart of %d nodes: %s", - nodesToRestart.size(), nodesToRestart.stream().sorted().collect(Collectors.joining(", "))); - deployLogger.log(Level.INFO, restartsMessage); - log.info(String.format("%sScheduled service restart of %d nodes: %s", - session.logPre(), nodesToRestart.size(), restartsMessage)); + applicationRepository.modifyPendingRestarts(applicationId, pendingRestarts -> pendingRestarts.withRestarts(session.getSessionId(), nodesToRestart)); + deployLogger.log(Level.INFO, String.format("Scheduled service restart of %d nodes: %s", + nodesToRestart.size(), nodesToRestart.stream().sorted().collect(joining(", ")))); + log.info(String.format("%sWill schedule service restart of %d nodes after convergence on generation %d: %s", + session.logPre(), nodesToRestart.size(), session.getSessionId(), nodesToRestart.stream().sorted().collect(joining(", ")))); this.configChangeActions = configChangeActions.withRestartActions(new RestartActions()); } - private void waitForConfigToConverge(ApplicationId applicationId, Set<String> hostnames) { - deployLogger.log(Level.INFO, "Wait for all services to use new config generation before restarting"); - var convergenceChecker = applicationRepository.configConvergenceChecker(); - var app = applicationRepository.getActiveApplication(applicationId); - - ServiceListResponse response = null; - while (timeLeft(applicationId, response)) { - response = convergenceChecker.checkConvergenceUnlessDeferringChangesUntilRestart(app, hostnames); - if (response.converged) { - deployLogger.log(Level.INFO, "Services converged on new config generation " + response.currentGeneration); - return; - } else { - deployLogger.log(Level.INFO, "Services that did not converge on new config generation " + - response.wantedGeneration + ": " + - servicesNotConvergedFormatted(response) + ". Will retry"); - try { Thread.sleep(5_000); } catch (InterruptedException e) { /* ignore */ } - } - } - } - - private boolean timeLeft(ApplicationId applicationId, ServiceListResponse response) { - try { - params.get().getTimeoutBudget().assertNotTimedOut( - () -> "Timeout exceeded while waiting for config convergence for " + applicationId + - ", wanted generation " + response.wantedGeneration + ", these services had another generation: " + - servicesNotConvergedFormatted(response)); - } catch (UncheckedTimeoutException e) { - throw new ConfigNotConvergedException(e); - } - return true; - } - - private String servicesNotConvergedFormatted(ServiceListResponse response) { - return response.services().stream() - .filter(service -> service.currentGeneration != response.wantedGeneration) - .map(service -> service.serviceInfo.getHostName() + ":" + service.serviceInfo.getServiceName() + - " on generation " + service.currentGeneration) - .collect(Collectors.joining(", ")); - } - private void storeReindexing(ApplicationId applicationId) { applicationRepository.modifyReindexing(applicationId, reindexing -> { for (Reindexing entry : session.getActivationTriggers().reindexings()) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java index dbd30f72c24..a0e48f3066a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java @@ -49,6 +49,8 @@ public class ConfigServerMaintenance { maintainers.add(new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30))); maintainers.add(new ReindexingMaintainer(applicationRepository, curator, Duration.ofMinutes(3), convergenceChecker, Clock.systemUTC())); + if (applicationRepository.configserverConfig().hostedVespa()) + maintainers.add(new PendingRestartsMaintainer(applicationRepository, curator, Clock.systemUTC(), Duration.ofSeconds(30))); } public void shutdown() { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java new file mode 100644 index 00000000000..e228e0edcb6 --- /dev/null +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java @@ -0,0 +1,102 @@ +package com.yahoo.vespa.config.server.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.HostFilter; +import com.yahoo.vespa.config.server.ApplicationRepository; +import com.yahoo.vespa.config.server.application.Application; +import com.yahoo.vespa.config.server.application.ApplicationCuratorDatabase; +import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse; +import com.yahoo.vespa.config.server.application.PendingRestarts; +import com.yahoo.vespa.config.server.tenant.Tenant; +import com.yahoo.vespa.curator.Curator; +import com.yahoo.yolean.Exceptions; + +import java.time.Clock; +import java.time.Duration; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * @author jonmv + */ +public class PendingRestartsMaintainer extends ConfigServerMaintainer { + + private final Clock clock; + + public PendingRestartsMaintainer(ApplicationRepository applicationRepository, Curator curator, Clock clock, Duration interval) { + super(applicationRepository, curator, applicationRepository.flagSource(), clock, interval, true); + this.clock = clock; + } + + @Override + protected double maintain() { + AtomicInteger attempts = new AtomicInteger(0); + AtomicInteger failures = new AtomicInteger(0); + for (Tenant tenant : applicationRepository.tenantRepository().getAllTenants()) { + ApplicationCuratorDatabase database = tenant.getApplicationRepo().database(); + for (ApplicationId id : database.activeApplications()) + applicationRepository.getActiveApplicationSet(id) + .map(application -> application.getForVersionOrLatest(Optional.empty(), clock.instant())) + .ifPresent(application -> { + try { + attempts.incrementAndGet(); + applicationRepository.modifyPendingRestarts(id, restarts -> + triggerPendingRestarts(restartingHosts -> convergenceOf(application, restartingHosts), + this::restart, + id, + restarts, + log)); + } + catch (RuntimeException e) { + log.log(Level.INFO, "Failed to update reindexing status for " + id + ": " + Exceptions.toMessageString(e)); + failures.incrementAndGet(); + } + }); + } + return asSuccessFactorDeviation(attempts.get(), failures.get()); + } + + private ServiceListResponse convergenceOf(Application application, Set<String> restartingHosts) { + return applicationRepository.configConvergenceChecker().checkConvergenceUnlessDeferringChangesUntilRestart(application, restartingHosts); + } + + private void restart(ApplicationId id, Set<String> nodesToRestart) { + applicationRepository.restart(id, HostFilter.from(nodesToRestart)); + } + + static PendingRestarts triggerPendingRestarts(Function<Set<String>, ServiceListResponse> convergenceChecker, + BiConsumer<ApplicationId, Set<String>> restarter, + ApplicationId id, + PendingRestarts restarts, + Logger log) { + Set<String> restartingHosts = restarts.hostnames(); + if (restartingHosts.isEmpty()) return restarts; + + ServiceListResponse convergence = convergenceChecker.apply(restartingHosts); + long lowestGeneration = convergence.currentGeneration; + Set<String> nodesToRestart = restarts.restartsReadyAt(lowestGeneration); + if (nodesToRestart.isEmpty()) { + log.info(String.format("Cannot yet restart nodes of %s, as some services are still on generation %d:\n\t%s", + id.toFullString(), + lowestGeneration, + convergence.services().stream() + .filter(service -> service.currentGeneration == lowestGeneration) + .map(service -> service.serviceInfo.getHostName() + ":" + service.serviceInfo.getServiceName()) + .collect(Collectors.joining("\n\t")))); + return restarts; + } + + restarter.accept(id, nodesToRestart); + log.info(String.format("Scheduled restart of %d nodes after observing generation %d: %s", + nodesToRestart.size(), lowestGeneration, nodesToRestart.stream().sorted().collect(Collectors.joining(", ")))); + + return restarts.withoutPreviousGenerations(lowestGeneration); + } + +} diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java index 8171d63ae37..0dc6cc004be 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java @@ -25,6 +25,8 @@ import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; +import static java.util.Comparator.naturalOrder; + /** * Watches pending reindexing, and sets these to ready when config convergence is observed. * Also removes data for clusters or document types which no longer exist. @@ -84,7 +86,7 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { return () -> { if (oldest.get() == 0) oldest.set(convergence.getServiceConfigGenerations(application, timeout).values().stream() - .min(Comparator.naturalOrder()) + .min(naturalOrder()) .orElse(-1L)); return oldest.get(); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java b/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java index 8f733c1dd08..7c93004b557 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java @@ -10,7 +10,7 @@ import java.util.Optional; /** * This class is necessary to support both having and not having a host provisioner. We inject - * a component registry here, which then enables us to check whether or not we have a provisioner available. + * a component registry here, which then enables us to check whether we have a provisioner available. * We only have a provisioner if we are running in hosted mode. * * @author Ulf Lilleengen diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java index d1877e1e057..801f599c2d8 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java @@ -84,7 +84,7 @@ public class ConfigServerBootstrapTest { assertTrue(rpcServer.isServingConfigRequests()); waitUntil(() -> bootstrap.status() == StateMonitor.Status.up, "failed waiting for status 'up'"); waitUntil(() -> bootstrap.vipStatus().isInRotation(), "failed waiting for server to be in rotation"); - assertEquals(List.of("ApplicationPackageMaintainer", "FileDistributionMaintainer", "ReindexingMaintainer", "SessionsMaintainer", "TenantsMaintainer"), + assertEquals(List.of("ApplicationPackageMaintainer", "FileDistributionMaintainer", "PendingRestartsMaintainer", "ReindexingMaintainer", "SessionsMaintainer", "TenantsMaintainer"), bootstrap.configServerMaintenance().maintainers().stream() .map(Maintainer::name) .sorted().toList()); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java index 41da444c1ec..c38f988bd82 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java @@ -7,10 +7,14 @@ import com.yahoo.vespa.curator.transaction.CuratorTransaction; import org.junit.Test; import java.time.Instant; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalLong; +import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -41,6 +45,32 @@ public class ApplicationCuratorDatabaseTest { } @Test + public void testPendingRestartsSerialization() { + ApplicationId id = ApplicationId.defaultId(); + ApplicationCuratorDatabase db = new ApplicationCuratorDatabase(id.tenant(), curator); + + assertEquals(Map.of(), db.readPendingRestarts(id).generationsForRestarts()); + + db.modifyPendingRestarts(id, restarts -> { + assertSame(PendingRestarts.empty(), restarts); + return restarts.withRestarts(2, List.of("a", "b")) + .withRestarts(2, List.of("c", "d")) + .withRestarts(3, List.of("a, b")) + .withRestarts(2, List.of("a", "b")); + }); + assertEquals(Map.of(2L, Set.of("a", "b", "c", "d"), 3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts()); + + db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(1)); + assertEquals(Map.of(2L, Set.of("a", "b", "c", "d"), 3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts()); + + db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(2)); + assertEquals(Map.of(3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts()); + + db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(3)); + assertSame(PendingRestarts.empty(), db.readPendingRestarts(id)); + } + + @Test public void testReadingAndWritingApplicationData() { ApplicationId id = ApplicationId.defaultId(); ApplicationCuratorDatabase db = new ApplicationCuratorDatabase(id.tenant(), curator); diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java index 24c98edd9aa..a6f2eb38cc3 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java @@ -25,8 +25,6 @@ import com.yahoo.config.provision.Zone; import com.yahoo.container.ComponentsConfig; import com.yahoo.slime.SlimeUtils; import com.yahoo.test.ManualClock; -import com.yahoo.vespa.config.GetConfigRequest; -import com.yahoo.vespa.config.search.core.ProtonConfig; import com.yahoo.vespa.config.server.MockConfigConvergenceChecker; import com.yahoo.vespa.config.server.application.ApplicationReindexing; import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker; @@ -34,6 +32,7 @@ import com.yahoo.vespa.config.server.http.InternalServerException; import com.yahoo.vespa.config.server.http.InvalidApplicationException; import com.yahoo.vespa.config.server.http.UnknownVespaVersionException; import com.yahoo.vespa.config.server.http.v2.PrepareResult; +import com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer; import com.yahoo.vespa.config.server.model.TestModelFactory; import com.yahoo.vespa.config.server.session.PrepareParams; import com.yahoo.vespa.model.VespaModel; @@ -58,7 +57,6 @@ import java.util.stream.IntStream; import static com.yahoo.vespa.config.server.deploy.DeployTester.CountingModelFactory; import static com.yahoo.vespa.config.server.deploy.DeployTester.createFailingModelFactory; import static com.yahoo.vespa.config.server.deploy.DeployTester.createHostedModelFactory; -import static com.yahoo.yolean.Exceptions.findCause; import static com.yahoo.yolean.Exceptions.uncheck; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -468,28 +466,40 @@ public class HostedDeployTest { } @Test - public void testConfigConvergenceBeforeRestart() { + public void testPendingRestartsAreTriggered() { List<Host> hosts = createHosts(9, "6.1.0", "6.2.0"); List<ServiceInfo> services = createServices(1); - List<ServiceInfo> twoServices = createServices(2); List<ModelFactory> modelFactories = List.of( new ConfigChangeActionsModelFactory(Version.fromString("6.2.0"), new VespaRestartAction(ClusterSpec.Id.from("test"), "change", services))); + List<ServiceInfo> mutableServices = new ArrayList<>(services); DeployTester tester = createTester(hosts, modelFactories, prodZone, Clock.systemUTC(), - new MockConfigConvergenceChecker(2L, services)); + new MockConfigConvergenceChecker(2L, mutableServices)); var result = tester.deployApp("src/test/apps/hosted/", "6.2.0"); DeployHandlerLogger deployLogger = result.deployLogger(); assertLogContainsMessage(deployLogger, "Scheduled service restart of 1 nodes: hostName0"); - assertLogContainsMessage(deployLogger, "Wait for all services to use new config generation before restarting"); - // Should only check convergence on 1 of the nodes - assertLogContainsMessage(deployLogger, "Services that did not converge on new config generation 2: hostName0:serviceName0 on generation 1. Will retry"); - assertLogContainsMessage(deployLogger, "Services converged on new config generation 2"); + assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).restartsReadyAt(1)); + assertEquals(Set.of("hostName0"), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames()); + + PendingRestartsMaintainer maintainer = new PendingRestartsMaintainer(tester.applicationRepository(), + tester.curator(), + tester.applicationRepository().clock(), + Duration.ofDays(1)); + // Maintainer does not trigger services before convergence has been reached + maintainer.run(); + assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).restartsReadyAt(1)); + assertEquals(Set.of("hostName0"), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames()); + + // All services converge, and maintainer triggers restarts + mutableServices.clear(); + maintainer.run(); + assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames()); } private void assertLogContainsMessage(DeployHandlerLogger log, String message) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java new file mode 100644 index 00000000000..119753ab3b4 --- /dev/null +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java @@ -0,0 +1,73 @@ +package com.yahoo.vespa.config.server.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse; +import com.yahoo.vespa.config.server.application.PendingRestarts; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +import static com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer.triggerPendingRestarts; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.fail; + +class PendingRestartsMaintainerTest { + + private static final Logger log = Logger.getLogger(PendingRestartsMaintainerTest.class.getName()); + + @Test + void testMaintenance() { + // Nothing happens with no pending restarts. + assertSame(PendingRestarts.empty(), + triggerPendingRestarts(hosts -> { fail("Should not be called"); return null; }, + (id, hosts) -> { fail("Should not be called"); }, + ApplicationId.defaultId(), + PendingRestarts.empty(), + log)); + + // Nothing happens when services are on a too low generation. + assertEquals(Map.of(1L, Set.of("a", "b"), 2L, Set.of("c")), + triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 0), + (id, hosts) -> { fail("Should not be called"); }, + ApplicationId.defaultId(), + PendingRestarts.empty() + .withRestarts(1, List.of("a", "b")) + .withRestarts(2, List.of("c")), + log) + .generationsForRestarts()); + + // Only the first hosts are restarted before the second generation is reached. + assertEquals(Map.of(2L, Set.of("c")), + triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 1), + (id, hosts) -> { + assertEquals(ApplicationId.defaultId(), id); + assertEquals(Set.of("a", "b"), hosts); + }, + ApplicationId.defaultId(), + PendingRestarts.empty() + .withRestarts(1, List.of("a", "b")) + .withRestarts(2, List.of("c")), + log) + .generationsForRestarts()); + + // All hosts are restarted when the second generation is reached. + assertEquals(Map.of(), + triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 2), + (id, hosts) -> { + assertEquals(ApplicationId.defaultId(), id); + assertEquals(Set.of("a", "b", "c"), hosts); + }, + ApplicationId.defaultId(), + PendingRestarts.empty() + .withRestarts(1, List.of("a", "b")) + .withRestarts(2, List.of("c")), + log) + .generationsForRestarts()); + + } + +} |