aboutsummaryrefslogtreecommitdiffstats
path: root/configserver
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-05-02 12:57:30 +0200
committerHarald Musum <musum@yahooinc.com>2023-05-02 12:57:30 +0200
commitea7eb3243474738236bbdcadfc4ecccfa2b6dcea (patch)
tree6cb104cbe3a4660a4757b7e2bf53ecc4910145ed /configserver
parentc6968d5c84407bc81b91ace17240668c4a222e2c (diff)
Add some more details when config is not converging
Diffstat (limited to 'configserver')
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java42
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java22
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java73
3 files changed, 112 insertions, 25 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
index 66a5bc5a023..eee9483cd69 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/Deployment.java
@@ -18,8 +18,6 @@ import com.yahoo.vespa.config.server.ApplicationRepository;
import com.yahoo.vespa.config.server.ApplicationRepository.ActionTimer;
import com.yahoo.vespa.config.server.ApplicationRepository.Activation;
import com.yahoo.vespa.config.server.TimeoutBudget;
-import com.yahoo.vespa.config.server.application.Application;
-import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker;
import com.yahoo.vespa.config.server.application.ConfigNotConvergedException;
import com.yahoo.vespa.config.server.configchange.ConfigChangeActions;
import com.yahoo.vespa.config.server.configchange.ReindexActions;
@@ -188,28 +186,44 @@ public class Deployment implements com.yahoo.config.provision.Deployment {
private void waitForConfigToConverge(ApplicationId applicationId) {
deployLogger.log(Level.INFO, "Wait for all services to use new config generation before restarting");
- while (true) {
- try {
- params.get().getTimeoutBudget().assertNotTimedOut(
- () -> "Timeout exceeded while waiting for config convergence for " + applicationId);
- } catch (UncheckedTimeoutException e) {
- throw new ConfigNotConvergedException(e);
- }
+ var convergenceChecker = applicationRepository.configConvergenceChecker();
+ var app = applicationRepository.getActiveApplication(applicationId);
- ConfigConvergenceChecker convergenceChecker = applicationRepository.configConvergenceChecker();
- Application app = applicationRepository.getActiveApplication(applicationId);
- ServiceListResponse response = convergenceChecker.checkConvergenceUnlessDeferringChangesUntilRestart(app);
+ ServiceListResponse response = null;
+ while (timeLeft(applicationId, response)) {
+ response = convergenceChecker.checkConvergenceUnlessDeferringChangesUntilRestart(app);
if (response.converged) {
deployLogger.log(Level.INFO, "Services converged on new config generation " + response.currentGeneration);
return;
} else {
- deployLogger.log(Level.INFO, "Services did not converge on new config generation " +
- response.wantedGeneration + ", current generation: " + response.currentGeneration + ", will retry");
+ deployLogger.log(Level.INFO, "Services that did not converge on new config generation " +
+ response.wantedGeneration + ": " +
+ servicesNotConvergedFormatted(response) + ". Will retry");
try { Thread.sleep(5_000); } catch (InterruptedException e) { /* ignore */ }
}
}
}
+ private boolean timeLeft(ApplicationId applicationId, ServiceListResponse response) {
+ try {
+ params.get().getTimeoutBudget().assertNotTimedOut(
+ () -> "Timeout exceeded while waiting for config convergence for " + applicationId +
+ ", wanted generation " + response.wantedGeneration + ", these services had another generation: " +
+ servicesNotConvergedFormatted(response));
+ } catch (UncheckedTimeoutException e) {
+ throw new ConfigNotConvergedException(e);
+ }
+ return true;
+ }
+
+ private String servicesNotConvergedFormatted(ServiceListResponse response) {
+ return response.services().stream()
+ .filter(service -> service.currentGeneration != response.wantedGeneration)
+ .map(service -> service.serviceInfo.getHostName() + ":" + service.serviceInfo.getServiceName() +
+ " on generation " + service.currentGeneration)
+ .collect(Collectors.joining(","));
+ }
+
private void storeReindexing(ApplicationId applicationId, long requiredSession) {
applicationRepository.modifyReindexing(applicationId, reindexing -> {
if (configChangeActions != null)
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java b/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java
index b4892caa05f..f335e978235 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/MockConfigConvergenceChecker.java
@@ -5,14 +5,26 @@ import com.yahoo.vespa.config.server.application.Application;
import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker;
import java.time.Duration;
+import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
public class MockConfigConvergenceChecker extends ConfigConvergenceChecker {
private final long wantedGeneration;
+ private final List<ServiceInfo> servicesThatFailFirstIteration;
+
+ private int iteration = 0;
public MockConfigConvergenceChecker(long wantedGeneration) {
+ this(wantedGeneration, List.of());
+ }
+
+ public MockConfigConvergenceChecker(long wantedGeneration, List<ServiceInfo> servicesThatFailFirstIteration) {
this.wantedGeneration = wantedGeneration;
+ this.servicesThatFailFirstIteration = servicesThatFailFirstIteration;
}
@Override
@@ -32,7 +44,15 @@ public class MockConfigConvergenceChecker extends ConfigConvergenceChecker {
@Override
public ServiceListResponse checkConvergenceUnlessDeferringChangesUntilRestart(Application application) {
- return new ServiceListResponse(Map.of(), wantedGeneration, wantedGeneration);
+ iteration++;
+ if (servicesThatFailFirstIteration.isEmpty() || iteration > 1)
+ return new ServiceListResponse(Map.of(), wantedGeneration, wantedGeneration);
+
+ Map<ServiceInfo, Long> services = new HashMap<>();
+ for (var service : servicesThatFailFirstIteration) {
+ services.put(service, wantedGeneration - 1);
+ }
+ return new ServiceListResponse(services, wantedGeneration, wantedGeneration - 1);
}
}
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
index 842416615e2..f170e232619 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/HostedDeployTest.java
@@ -22,9 +22,11 @@ import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
+import com.yahoo.slime.SlimeUtils;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.config.server.MockConfigConvergenceChecker;
import com.yahoo.vespa.config.server.application.ApplicationReindexing;
+import com.yahoo.vespa.config.server.application.ConfigConvergenceChecker;
import com.yahoo.vespa.config.server.http.InternalServerException;
import com.yahoo.vespa.config.server.http.InvalidApplicationException;
import com.yahoo.vespa.config.server.http.UnknownVespaVersionException;
@@ -42,7 +44,6 @@ import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -445,8 +446,7 @@ public class HostedDeployTest {
@Test
public void testThatConfigChangeActionsAreCollectedFromAllModels() {
List<Host> hosts = createHosts(9, "6.1.0", "6.2.0");
- List<ServiceInfo> services = List.of(
- new ServiceInfo("serviceName", "serviceType", null, new HashMap<>(), "configId", "hostName"));
+ List<ServiceInfo> services = createServices();
List<ModelFactory> modelFactories = List.of(
new ConfigChangeActionsModelFactory(Version.fromString("6.1.0"),
@@ -461,10 +461,41 @@ public class HostedDeployTest {
}
@Test
+ public void testConfigConvergenceBeforeRestart() {
+ List<Host> hosts = createHosts(9, "6.1.0", "6.2.0");
+ List<ServiceInfo> services = createServices();
+
+ List<ModelFactory> modelFactories = List.of(
+ new ConfigChangeActionsModelFactory(Version.fromString("6.1.0"),
+ new VespaRestartAction(ClusterSpec.Id.from("test"), "change", services)),
+ new ConfigChangeActionsModelFactory(Version.fromString("6.2.0"),
+ new VespaRestartAction(ClusterSpec.Id.from("test"), "other change", services)));
+
+ DeployTester tester = createTester(hosts,
+ modelFactories,
+ prodZone,
+ Clock.systemUTC(),
+ new MockConfigConvergenceChecker(2L, services));
+ var result = tester.deployApp("src/test/apps/hosted/", "6.2.0");
+ DeployHandlerLogger deployLogger = result.deployLogger();
+
+ System.out.println(SlimeUtils.toJson(deployLogger.slime().get()));
+ assertLogContainsMessage(deployLogger, "Wait for all services to use new config generation before restarting");
+ assertLogContainsMessage(deployLogger, "Services that did not converge on new config generation 2: hostName:serviceName on generation 1,hostName2:serviceName2 on generation 1. Will retry");
+ assertLogContainsMessage(deployLogger, "Services converged on new config generation 2");
+ }
+
+ private void assertLogContainsMessage(DeployHandlerLogger log, String message) {
+ assertEquals(1, SlimeUtils.entriesStream(log.slime().get().field("log"))
+ .map(entry -> entry.field("message").asString())
+ .filter(m -> m.contains(message))
+ .count());
+ }
+
+ @Test
public void testThatAllowedConfigChangeActionsAreActedUpon() {
List<Host> hosts = createHosts(9, "6.1.0");
- List<ServiceInfo> services = List.of(
- new ServiceInfo("serviceName", "serviceType", null, Map.of("clustername", "cluster"), "configId", "hostName"));
+ List<ServiceInfo> services = createServices();
ManualClock clock = new ManualClock(Instant.EPOCH);
List<ModelFactory> modelFactories = List.of(
@@ -539,18 +570,40 @@ public class HostedDeployTest {
return createTester(hosts, modelFactories, zone, Clock.systemUTC());
}
- private DeployTester createTester(List<Host> hosts, List<ModelFactory> modelFactories,
- Zone prodZone, Clock clock) {
+ private DeployTester createTester(List<Host> hosts, List<ModelFactory> modelFactories, Zone zone, Clock clock) {
+ return createTester(hosts, modelFactories, zone, clock, new MockConfigConvergenceChecker(2));
+ }
+
+ private DeployTester createTester(List<Host> hosts,
+ List<ModelFactory> modelFactories,
+ Zone zone,
+ Clock clock,
+ ConfigConvergenceChecker configConvergenceChecker) {
return new DeployTester.Builder(temporaryFolder)
.modelFactories(modelFactories)
.clock(clock)
- .zone(prodZone)
+ .zone(zone)
.hostProvisioner(new InMemoryProvisioner(new Hosts(hosts), true, false))
- .configConvergenceChecker(new MockConfigConvergenceChecker(2))
- .hostedConfigserverConfig(prodZone)
+ .configConvergenceChecker(configConvergenceChecker)
+ .hostedConfigserverConfig(zone)
.build();
}
+ private static List<ServiceInfo> createServices() {
+ return List.of(new ServiceInfo("serviceName",
+ "serviceType",
+ null,
+ Map.of("clustername", "cluster"),
+ "configId",
+ "hostName"),
+ new ServiceInfo("serviceName2",
+ "serviceType2",
+ null,
+ Map.of("clustername", "cluster"),
+ "configId",
+ "hostName2"));
+ }
+
private static class ConfigChangeActionsModelFactory extends TestModelFactory {
private final List<ConfigChangeAction> actions;