summaryrefslogtreecommitdiffstats
path: root/configserver
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2024-01-12 16:12:26 +0100
committerjonmv <venstad@gmail.com>2024-01-12 16:12:26 +0100
commite49f46a6e42d3cddc8dca363d90f27a24c483024 (patch)
tree41554882ac71d3416103d90f4b9d558bf5d97140 /configserver
parent469d7dc346f3c900953e9cd8110c6fa070ff5e74 (diff)
Add a maintainer for triggering node restarts triggered by deployment
Diffstat (limited to 'configserver')
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java10
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java59
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java58
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java57
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java2
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java102
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java2
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java2
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java30
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java30
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java73
12 files changed, 364 insertions, 65 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
index fc60f1225a7..742e92500d9 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
@@ -56,6 +56,7 @@ import com.yahoo.vespa.config.server.application.ActiveTokenFingerprints;
import com.yahoo.vespa.config.server.application.DefaultClusterReindexingStatusClient;
import com.yahoo.vespa.config.server.application.FileDistributionStatus;
import com.yahoo.vespa.config.server.application.HttpProxy;
+import com.yahoo.vespa.config.server.application.PendingRestarts;
import com.yahoo.vespa.config.server.application.TenantApplications;
import com.yahoo.vespa.config.server.configchange.ConfigChangeActions;
import com.yahoo.vespa.config.server.configchange.RefeedActions;
@@ -1041,6 +1042,15 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
tenant.getApplicationRepo().database().modifyReindexing(id, ApplicationReindexing.empty(), modifications);
}
+ public PendingRestarts getPendingRestarts(ApplicationId id) {
+ return requireDatabase(id).readPendingRestarts(id);
+ }
+
+ public void modifyPendingRestarts(ApplicationId id, UnaryOperator<PendingRestarts> modifications) {
+ if (hostProvisioner.isEmpty()) return;
+ getTenant(id).getApplicationRepo().database().modifyPendingRestarts(id, modifications);
+ }
+
public ConfigserverConfig configserverConfig() {
return configserverConfig;
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java
index e17fda76a01..b2dbea569b6 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabase.java
@@ -28,6 +28,8 @@ import java.util.concurrent.ExecutorService;
import java.util.function.UnaryOperator;
import static com.yahoo.vespa.curator.transaction.CuratorOperations.setData;
+import static com.yahoo.yolean.Exceptions.uncheck;
+import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toUnmodifiableMap;
/**
@@ -197,6 +199,25 @@ public class ApplicationCuratorDatabase {
.toList();
}
+ public PendingRestarts readPendingRestarts(ApplicationId id) {
+ return curator.getData(pendingRestartsPath(id))
+ .map(PendingRestartsSerializer::fromBytes)
+ .orElse(PendingRestarts.empty());
+ }
+
+ public void modifyPendingRestarts(ApplicationId id, UnaryOperator<PendingRestarts> modification) {
+ try (Lock lock = curator.lock(restartsLockPath(id), Duration.ofMinutes(1))) {
+ PendingRestarts original = readPendingRestarts(id);
+ PendingRestarts modified = modification.apply(original);
+ if (original != modified) {
+ if (modified.isEmpty())
+ curator.delete(pendingRestartsPath(id));
+ else
+ curator.set(pendingRestartsPath(id), PendingRestartsSerializer.toBytes(modified));
+ }
+ }
+ }
+
public Optional<ApplicationReindexing> readReindexingStatus(ApplicationId id) {
return curator.getData(reindexingDataPath(id))
.map(ReindexingStatusSerializer::fromBytes);
@@ -211,6 +232,10 @@ public class ApplicationCuratorDatabase {
return curator.createDirectoryCache(applicationsPath.getAbsolute(), false, false, zkCacheExecutor);
}
+ private Path restartsLockPath(ApplicationId id) {
+ return locksPath.append(id.serializedForm() + "::restarts");
+ }
+
private Path reindexingLockPath(ApplicationId id) {
return locksPath.append(id.serializedForm()).append("reindexing");
}
@@ -227,6 +252,38 @@ public class ApplicationCuratorDatabase {
return applicationPath(id).append("reindexing");
}
+ private Path pendingRestartsPath(ApplicationId id) {
+ return applicationPath(id).append("restarts");
+ }
+
+ private static class PendingRestartsSerializer {
+
+ private static final String GENERATIONS = "generations";
+ private static final String GENERATION = "generation";
+ private static final String HOSTNAMES = "hostnames";
+
+ private static byte[] toBytes(PendingRestarts pendingRestarts) {
+ Cursor root = new Slime().setObject();
+ Cursor generationsArray = root.setArray(GENERATIONS);
+ pendingRestarts.generationsForRestarts().forEach((generation, hostnames) -> {
+ Cursor generationObject = generationsArray.addObject();
+ generationObject.setLong(GENERATION, generation);
+ hostnames.forEach(generationObject.setArray(HOSTNAMES)::addString);
+ });
+ return uncheck(() -> SlimeUtils.toJsonBytes(root));
+ }
+
+ private static PendingRestarts fromBytes(byte[] data) {
+ Cursor root = SlimeUtils.jsonToSlimeOrThrow(data).get();
+ return new PendingRestarts(SlimeUtils.entriesStream(root.field(GENERATIONS))
+ .collect(toMap(entry -> entry.field(GENERATION).asLong(),
+ entry -> SlimeUtils.entriesStream(entry.field(HOSTNAMES))
+ .map(Inspector::asString)
+ .toList())));
+ }
+
+ }
+
private static class ReindexingStatusSerializer {
private static final String ENABLED = "enabled";
@@ -263,7 +320,7 @@ public class ApplicationCuratorDatabase {
setStatus(statusObject, status);
});
});
- return Exceptions.uncheck(() -> SlimeUtils.toJsonBytes(root));
+ return uncheck(() -> SlimeUtils.toJsonBytes(root));
}
private static void setStatus(Cursor statusObject, Status status) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java
new file mode 100644
index 00000000000..e573effc8a9
--- /dev/null
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/PendingRestarts.java
@@ -0,0 +1,58 @@
+package com.yahoo.vespa.config.server.application;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+
+import static java.util.Collections.unmodifiableMap;
+import static java.util.Collections.unmodifiableSet;
+
+/**
+ * @author jonmv
+ */
+public class PendingRestarts {
+
+ private static final PendingRestarts empty = new PendingRestarts(Map.of());
+
+ private final Map<Long, Set<String>> generationsForRestarts;
+
+ public static PendingRestarts empty() {
+ return empty;
+ }
+
+ PendingRestarts(Map<Long, ? extends Collection<String>> generationsForRestarts) {
+ Map<Long, Set<String>> builder = new LinkedHashMap<>();
+ generationsForRestarts.forEach((generation, hostnames) -> builder.put(generation, unmodifiableSet(new LinkedHashSet<>(hostnames))));
+ this.generationsForRestarts = unmodifiableMap(builder);
+ }
+
+ public Map<Long, Set<String>> generationsForRestarts() { return generationsForRestarts; }
+
+ public boolean isEmpty() { return generationsForRestarts.isEmpty(); }
+
+ public PendingRestarts withRestarts(long atGeneration, Collection<String> hostnames) {
+ Map<Long, Set<String>> newRestarts = new LinkedHashMap<>(generationsForRestarts);
+ newRestarts.put(atGeneration, new LinkedHashSet<>(newRestarts.getOrDefault(atGeneration, Set.of())) {{ addAll(hostnames); }});
+ return new PendingRestarts(newRestarts);
+ }
+
+ public PendingRestarts withoutPreviousGenerations(long generation) {
+ Map<Long, Set<String>> newRestarts = new LinkedHashMap<>(generationsForRestarts);
+ newRestarts.keySet().removeIf(g -> g <= generation);
+ return new PendingRestarts(newRestarts);
+ }
+
+ public Set<String> hostnames() {
+ return restartsReadyAt(Long.MAX_VALUE);
+ }
+
+ public Set<String> restartsReadyAt(long generation) {
+ LinkedHashSet<String> ready = new LinkedHashSet<>();
+ generationsForRestarts.forEach((g, hosts) -> { if (g <= generation) ready.addAll(hosts); });
+ return ready;
+ }
+
+}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
index f10bea5da6e..7c7608e5e5c 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
@@ -45,6 +45,7 @@ import java.util.stream.Collectors;
import static com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse;
import static com.yahoo.vespa.config.server.session.Session.Status.DELETE;
+import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toSet;
/**
@@ -171,60 +172,14 @@ public class Deployment implements com.yahoo.config.provision.Deployment {
Set<String> nodesToRestart = session.getActivationTriggers().nodeRestarts().stream().map(NodeRestart::hostname).collect(toSet());
if (nodesToRestart.isEmpty()) return;
- // TODO: replace this with a maintainer that waits for active config >= this session's config generation,
- // and let config convergence waiter in controller also check _pending_ restarts, maintained by that.
- // Here, we'll then instead hand these restarts over to that maintainer, with our session id.
- waitForConfigToConverge(applicationId, nodesToRestart);
-
- provisioner.get().restart(applicationId, HostFilter.from(nodesToRestart));
- String restartsMessage = String.format("Scheduled service restart of %d nodes: %s",
- nodesToRestart.size(), nodesToRestart.stream().sorted().collect(Collectors.joining(", ")));
- deployLogger.log(Level.INFO, restartsMessage);
- log.info(String.format("%sScheduled service restart of %d nodes: %s",
- session.logPre(), nodesToRestart.size(), restartsMessage));
+ applicationRepository.modifyPendingRestarts(applicationId, pendingRestarts -> pendingRestarts.withRestarts(session.getSessionId(), nodesToRestart));
+ deployLogger.log(Level.INFO, String.format("Scheduled service restart of %d nodes: %s",
+ nodesToRestart.size(), nodesToRestart.stream().sorted().collect(joining(", "))));
+ log.info(String.format("%sWill schedule service restart of %d nodes after convergence on generation %d: %s",
+ session.logPre(), nodesToRestart.size(), session.getSessionId(), nodesToRestart.stream().sorted().collect(joining(", "))));
this.configChangeActions = configChangeActions.withRestartActions(new RestartActions());
}
- private void waitForConfigToConverge(ApplicationId applicationId, Set<String> hostnames) {
- deployLogger.log(Level.INFO, "Wait for all services to use new config generation before restarting");
- var convergenceChecker = applicationRepository.configConvergenceChecker();
- var app = applicationRepository.getActiveApplication(applicationId);
-
- ServiceListResponse response = null;
- while (timeLeft(applicationId, response)) {
- response = convergenceChecker.checkConvergenceUnlessDeferringChangesUntilRestart(app, hostnames);
- if (response.converged) {
- deployLogger.log(Level.INFO, "Services converged on new config generation " + response.currentGeneration);
- return;
- } else {
- deployLogger.log(Level.INFO, "Services that did not converge on new config generation " +
- response.wantedGeneration + ": " +
- servicesNotConvergedFormatted(response) + ". Will retry");
- try { Thread.sleep(5_000); } catch (InterruptedException e) { /* ignore */ }
- }
- }
- }
-
- private boolean timeLeft(ApplicationId applicationId, ServiceListResponse response) {
- try {
- params.get().getTimeoutBudget().assertNotTimedOut(
- () -> "Timeout exceeded while waiting for config convergence for " + applicationId +
- ", wanted generation " + response.wantedGeneration + ", these services had another generation: " +
- servicesNotConvergedFormatted(response));
- } catch (UncheckedTimeoutException e) {
- throw new ConfigNotConvergedException(e);
- }
- return true;
- }
-
- private String servicesNotConvergedFormatted(ServiceListResponse response) {
- return response.services().stream()
- .filter(service -> service.currentGeneration != response.wantedGeneration)
- .map(service -> service.serviceInfo.getHostName() + ":" + service.serviceInfo.getServiceName() +
- " on generation " + service.currentGeneration)
- .collect(Collectors.joining(", "));
- }
-
private void storeReindexing(ApplicationId applicationId) {
applicationRepository.modifyReindexing(applicationId, reindexing -> {
for (Reindexing entry : session.getActivationTriggers().reindexings())
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
index dbd30f72c24..a0e48f3066a 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
@@ -49,6 +49,8 @@ public class ConfigServerMaintenance {
maintainers.add(new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30)));
maintainers.add(new ReindexingMaintainer(applicationRepository, curator,
Duration.ofMinutes(3), convergenceChecker, Clock.systemUTC()));
+ if (applicationRepository.configserverConfig().hostedVespa())
+ maintainers.add(new PendingRestartsMaintainer(applicationRepository, curator, Clock.systemUTC(), Duration.ofSeconds(30)));
}
public void shutdown() {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java
new file mode 100644
index 00000000000..e228e0edcb6
--- /dev/null
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainer.java
@@ -0,0 +1,102 @@
+package com.yahoo.vespa.config.server.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.HostFilter;
+import com.yahoo.vespa.config.server.ApplicationRepository;
+import com.yahoo.vespa.config.server.application.Application;
+import com.yahoo.vespa.config.server.application.ApplicationCuratorDatabase;
+import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse;
+import com.yahoo.vespa.config.server.application.PendingRestarts;
+import com.yahoo.vespa.config.server.tenant.Tenant;
+import com.yahoo.vespa.curator.Curator;
+import com.yahoo.yolean.Exceptions;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+/**
+ * @author jonmv
+ */
+public class PendingRestartsMaintainer extends ConfigServerMaintainer {
+
+ private final Clock clock;
+
+ public PendingRestartsMaintainer(ApplicationRepository applicationRepository, Curator curator, Clock clock, Duration interval) {
+ super(applicationRepository, curator, applicationRepository.flagSource(), clock, interval, true);
+ this.clock = clock;
+ }
+
+ @Override
+ protected double maintain() {
+ AtomicInteger attempts = new AtomicInteger(0);
+ AtomicInteger failures = new AtomicInteger(0);
+ for (Tenant tenant : applicationRepository.tenantRepository().getAllTenants()) {
+ ApplicationCuratorDatabase database = tenant.getApplicationRepo().database();
+ for (ApplicationId id : database.activeApplications())
+ applicationRepository.getActiveApplicationSet(id)
+ .map(application -> application.getForVersionOrLatest(Optional.empty(), clock.instant()))
+ .ifPresent(application -> {
+ try {
+ attempts.incrementAndGet();
+ applicationRepository.modifyPendingRestarts(id, restarts ->
+ triggerPendingRestarts(restartingHosts -> convergenceOf(application, restartingHosts),
+ this::restart,
+ id,
+ restarts,
+ log));
+ }
+ catch (RuntimeException e) {
+ log.log(Level.INFO, "Failed to update reindexing status for " + id + ": " + Exceptions.toMessageString(e));
+ failures.incrementAndGet();
+ }
+ });
+ }
+ return asSuccessFactorDeviation(attempts.get(), failures.get());
+ }
+
+ private ServiceListResponse convergenceOf(Application application, Set<String> restartingHosts) {
+ return applicationRepository.configConvergenceChecker().checkConvergenceUnlessDeferringChangesUntilRestart(application, restartingHosts);
+ }
+
+ private void restart(ApplicationId id, Set<String> nodesToRestart) {
+ applicationRepository.restart(id, HostFilter.from(nodesToRestart));
+ }
+
+ static PendingRestarts triggerPendingRestarts(Function<Set<String>, ServiceListResponse> convergenceChecker,
+ BiConsumer<ApplicationId, Set<String>> restarter,
+ ApplicationId id,
+ PendingRestarts restarts,
+ Logger log) {
+ Set<String> restartingHosts = restarts.hostnames();
+ if (restartingHosts.isEmpty()) return restarts;
+
+ ServiceListResponse convergence = convergenceChecker.apply(restartingHosts);
+ long lowestGeneration = convergence.currentGeneration;
+ Set<String> nodesToRestart = restarts.restartsReadyAt(lowestGeneration);
+ if (nodesToRestart.isEmpty()) {
+ log.info(String.format("Cannot yet restart nodes of %s, as some services are still on generation %d:\n\t%s",
+ id.toFullString(),
+ lowestGeneration,
+ convergence.services().stream()
+ .filter(service -> service.currentGeneration == lowestGeneration)
+ .map(service -> service.serviceInfo.getHostName() + ":" + service.serviceInfo.getServiceName())
+ .collect(Collectors.joining("\n\t"))));
+ return restarts;
+ }
+
+ restarter.accept(id, nodesToRestart);
+ log.info(String.format("Scheduled restart of %d nodes after observing generation %d: %s",
+ nodesToRestart.size(), lowestGeneration, nodesToRestart.stream().sorted().collect(Collectors.joining(", "))));
+
+ return restarts.withoutPreviousGenerations(lowestGeneration);
+ }
+
+}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
index 8171d63ae37..0dc6cc004be 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
@@ -25,6 +25,8 @@ import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
+import static java.util.Comparator.naturalOrder;
+
/**
* Watches pending reindexing, and sets these to ready when config convergence is observed.
* Also removes data for clusters or document types which no longer exist.
@@ -84,7 +86,7 @@ public class ReindexingMaintainer extends ConfigServerMaintainer {
return () -> {
if (oldest.get() == 0)
oldest.set(convergence.getServiceConfigGenerations(application, timeout).values().stream()
- .min(Comparator.naturalOrder())
+ .min(naturalOrder())
.orElse(-1L));
return oldest.get();
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java b/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java
index 8f733c1dd08..7c93004b557 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/provision/HostProvisionerProvider.java
@@ -10,7 +10,7 @@ import java.util.Optional;
/**
* This class is necessary to support both having and not having a host provisioner. We inject
- * a component registry here, which then enables us to check whether or not we have a provisioner available.
+ * a component registry here, which then enables us to check whether we have a provisioner available.
* We only have a provisioner if we are running in hosted mode.
*
* @author Ulf Lilleengen
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
index d1877e1e057..801f599c2d8 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
@@ -84,7 +84,7 @@ public class ConfigServerBootstrapTest {
assertTrue(rpcServer.isServingConfigRequests());
waitUntil(() -> bootstrap.status() == StateMonitor.Status.up, "failed waiting for status 'up'");
waitUntil(() -> bootstrap.vipStatus().isInRotation(), "failed waiting for server to be in rotation");
- assertEquals(List.of("ApplicationPackageMaintainer", "FileDistributionMaintainer", "ReindexingMaintainer", "SessionsMaintainer", "TenantsMaintainer"),
+ assertEquals(List.of("ApplicationPackageMaintainer", "FileDistributionMaintainer", "PendingRestartsMaintainer", "ReindexingMaintainer", "SessionsMaintainer", "TenantsMaintainer"),
bootstrap.configServerMaintenance().maintainers().stream()
.map(Maintainer::name)
.sorted().toList());
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java
index 41da444c1ec..c38f988bd82 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/ApplicationCuratorDatabaseTest.java
@@ -7,10 +7,14 @@ import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import org.junit.Test;
import java.time.Instant;
+import java.util.List;
+import java.util.Map;
import java.util.Optional;
import java.util.OptionalLong;
+import java.util.Set;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -41,6 +45,32 @@ public class ApplicationCuratorDatabaseTest {
}
@Test
+ public void testPendingRestartsSerialization() {
+ ApplicationId id = ApplicationId.defaultId();
+ ApplicationCuratorDatabase db = new ApplicationCuratorDatabase(id.tenant(), curator);
+
+ assertEquals(Map.of(), db.readPendingRestarts(id).generationsForRestarts());
+
+ db.modifyPendingRestarts(id, restarts -> {
+ assertSame(PendingRestarts.empty(), restarts);
+ return restarts.withRestarts(2, List.of("a", "b"))
+ .withRestarts(2, List.of("c", "d"))
+ .withRestarts(3, List.of("a, b"))
+ .withRestarts(2, List.of("a", "b"));
+ });
+ assertEquals(Map.of(2L, Set.of("a", "b", "c", "d"), 3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts());
+
+ db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(1));
+ assertEquals(Map.of(2L, Set.of("a", "b", "c", "d"), 3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts());
+
+ db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(2));
+ assertEquals(Map.of(3L, Set.of("a, b")), db.readPendingRestarts(id).generationsForRestarts());
+
+ db.modifyPendingRestarts(id, restarts -> restarts.withoutPreviousGenerations(3));
+ assertSame(PendingRestarts.empty(), db.readPendingRestarts(id));
+ }
+
+ @Test
public void testReadingAndWritingApplicationData() {
ApplicationId id = ApplicationId.defaultId();
ApplicationCuratorDatabase db = new ApplicationCuratorDatabase(id.tenant(), curator);
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
index 24c98edd9aa..a6f2eb38cc3 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
@@ -25,8 +25,6 @@ import com.yahoo.config.provision.Zone;
import com.yahoo.container.ComponentsConfig;
import com.yahoo.slime.SlimeUtils;
import com.yahoo.test.ManualClock;
-import com.yahoo.vespa.config.GetConfigRequest;
-import com.yahoo.vespa.config.search.core.ProtonConfig;
import com.yahoo.vespa.config.server.MockConfigConvergenceChecker;
import com.yahoo.vespa.config.server.application.ApplicationReindexing;
import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker;
@@ -34,6 +32,7 @@ import com.yahoo.vespa.config.server.http.InternalServerException;
import com.yahoo.vespa.config.server.http.InvalidApplicationException;
import com.yahoo.vespa.config.server.http.UnknownVespaVersionException;
import com.yahoo.vespa.config.server.http.v2.PrepareResult;
+import com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer;
import com.yahoo.vespa.config.server.model.TestModelFactory;
import com.yahoo.vespa.config.server.session.PrepareParams;
import com.yahoo.vespa.model.VespaModel;
@@ -58,7 +57,6 @@ import java.util.stream.IntStream;
import static com.yahoo.vespa.config.server.deploy.DeployTester.CountingModelFactory;
import static com.yahoo.vespa.config.server.deploy.DeployTester.createFailingModelFactory;
import static com.yahoo.vespa.config.server.deploy.DeployTester.createHostedModelFactory;
-import static com.yahoo.yolean.Exceptions.findCause;
import static com.yahoo.yolean.Exceptions.uncheck;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -468,28 +466,40 @@ public class HostedDeployTest {
}
@Test
- public void testConfigConvergenceBeforeRestart() {
+ public void testPendingRestartsAreTriggered() {
List<Host> hosts = createHosts(9, "6.1.0", "6.2.0");
List<ServiceInfo> services = createServices(1);
- List<ServiceInfo> twoServices = createServices(2);
List<ModelFactory> modelFactories = List.of(
new ConfigChangeActionsModelFactory(Version.fromString("6.2.0"),
new VespaRestartAction(ClusterSpec.Id.from("test"), "change", services)));
+ List<ServiceInfo> mutableServices = new ArrayList<>(services);
DeployTester tester = createTester(hosts,
modelFactories,
prodZone,
Clock.systemUTC(),
- new MockConfigConvergenceChecker(2L, services));
+ new MockConfigConvergenceChecker(2L, mutableServices));
var result = tester.deployApp("src/test/apps/hosted/", "6.2.0");
DeployHandlerLogger deployLogger = result.deployLogger();
assertLogContainsMessage(deployLogger, "Scheduled service restart of 1 nodes: hostName0");
- assertLogContainsMessage(deployLogger, "Wait for all services to use new config generation before restarting");
- // Should only check convergence on 1 of the nodes
- assertLogContainsMessage(deployLogger, "Services that did not converge on new config generation 2: hostName0:serviceName0 on generation 1. Will retry");
- assertLogContainsMessage(deployLogger, "Services converged on new config generation 2");
+ assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).restartsReadyAt(1));
+ assertEquals(Set.of("hostName0"), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames());
+
+ PendingRestartsMaintainer maintainer = new PendingRestartsMaintainer(tester.applicationRepository(),
+ tester.curator(),
+ tester.applicationRepository().clock(),
+ Duration.ofDays(1));
+ // Maintainer does not trigger services before convergence has been reached
+ maintainer.run();
+ assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).restartsReadyAt(1));
+ assertEquals(Set.of("hostName0"), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames());
+
+ // All services converge, and maintainer triggers restarts
+ mutableServices.clear();
+ maintainer.run();
+ assertEquals(Set.of(), tester.applicationRepository().getPendingRestarts(tester.applicationId()).hostnames());
}
private void assertLogContainsMessage(DeployHandlerLogger log, String message) {
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java
new file mode 100644
index 00000000000..119753ab3b4
--- /dev/null
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/maintenance/PendingRestartsMaintainerTest.java
@@ -0,0 +1,73 @@
+package com.yahoo.vespa.config.server.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker.ServiceListResponse;
+import com.yahoo.vespa.config.server.application.PendingRestarts;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import static com.yahoo.vespa.config.server.maintenance.PendingRestartsMaintainer.triggerPendingRestarts;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.fail;
+
+class PendingRestartsMaintainerTest {
+
+ private static final Logger log = Logger.getLogger(PendingRestartsMaintainerTest.class.getName());
+
+ @Test
+ void testMaintenance() {
+ // Nothing happens with no pending restarts.
+ assertSame(PendingRestarts.empty(),
+ triggerPendingRestarts(hosts -> { fail("Should not be called"); return null; },
+ (id, hosts) -> { fail("Should not be called"); },
+ ApplicationId.defaultId(),
+ PendingRestarts.empty(),
+ log));
+
+ // Nothing happens when services are on a too low generation.
+ assertEquals(Map.of(1L, Set.of("a", "b"), 2L, Set.of("c")),
+ triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 0),
+ (id, hosts) -> { fail("Should not be called"); },
+ ApplicationId.defaultId(),
+ PendingRestarts.empty()
+ .withRestarts(1, List.of("a", "b"))
+ .withRestarts(2, List.of("c")),
+ log)
+ .generationsForRestarts());
+
+ // Only the first hosts are restarted before the second generation is reached.
+ assertEquals(Map.of(2L, Set.of("c")),
+ triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 1),
+ (id, hosts) -> {
+ assertEquals(ApplicationId.defaultId(), id);
+ assertEquals(Set.of("a", "b"), hosts);
+ },
+ ApplicationId.defaultId(),
+ PendingRestarts.empty()
+ .withRestarts(1, List.of("a", "b"))
+ .withRestarts(2, List.of("c")),
+ log)
+ .generationsForRestarts());
+
+ // All hosts are restarted when the second generation is reached.
+ assertEquals(Map.of(),
+ triggerPendingRestarts(hosts -> new ServiceListResponse(Map.of(), 3, 2),
+ (id, hosts) -> {
+ assertEquals(ApplicationId.defaultId(), id);
+ assertEquals(Set.of("a", "b", "c"), hosts);
+ },
+ ApplicationId.defaultId(),
+ PendingRestarts.empty()
+ .withRestarts(1, List.of("a", "b"))
+ .withRestarts(2, List.of("c")),
+ log)
+ .generationsForRestarts());
+
+ }
+
+}