From 3c92c3ca3e8b272fcdc531d6dfa99925b98e0a2c Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Thu, 23 Mar 2023 15:27:30 +0100 Subject: Revert "Cleanup: Remove RemoveApplicationWaiter, use CuratorCompletionWaiter …" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../vespa/config/server/ApplicationRepository.java | 6 +- .../server/application/TenantApplications.java | 162 ++++++++++++++++++--- .../server/application/TenantApplicationsTest.java | 39 +++-- 3 files changed, 175 insertions(+), 32 deletions(-) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 2cf3860c401..f6988a6b566 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -534,7 +534,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye NestedTransaction transaction = new NestedTransaction(); Optional applicationTransaction = hostProvisioner.map(provisioner -> provisioner.lock(applicationId)) .map(lock -> new ApplicationTransaction(lock, transaction)); - try (@SuppressWarnings("unused") var applicationLock = tenantApplications.lock(applicationId)) { + try (var applicationLock = tenantApplications.lock(applicationId)) { Optional activeSession = tenantApplications.activeSessionOf(applicationId); CompletionWaiter waiter; if (activeSession.isPresent()) { @@ -544,7 +544,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye } catch (NotFoundException e) { log.log(Level.INFO, TenantRepository.logPre(applicationId) + "Active session exists, but has not been deleted properly. Trying to cleanup"); } - waiter = tenantApplications.createDeleteApplicationWaiter(applicationId); + waiter = tenantApplications.createRemoveApplicationWaiter(applicationId); } else { // If there's no active session, we still want to clean up any resources created in a failing prepare waiter = new NoopCompletionWaiter(); @@ -796,7 +796,7 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye NestedTransaction transaction = new NestedTransaction(); Optional applicationTransaction = hostProvisioner.map(provisioner -> provisioner.lock(applicationId)) .map(lock -> new ApplicationTransaction(lock, transaction)); - try (@SuppressWarnings("unused") var sessionLock = tenant.getApplicationRepo().lock(applicationId)) { + try (var sessionLock = tenant.getApplicationRepo().lock(applicationId)) { Optional activeSession = getActiveSession(applicationId); var sessionZooKeeperClient = tenant.getSessionRepository().createSessionZooKeeperClient(session.getSessionId()); CompletionWaiter waiter = sessionZooKeeperClient.createActiveWaiter(); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/application/TenantApplications.java b/configserver/src/main/java/com/yahoo/vespa/config/server/application/TenantApplications.java index 2e31e6cf9fd..88e3134ccad 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/application/TenantApplications.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/application/TenantApplications.java @@ -13,8 +13,8 @@ import com.yahoo.transaction.Transaction; import com.yahoo.vespa.config.ConfigKey; import com.yahoo.vespa.config.GetConfigRequest; import com.yahoo.vespa.config.protocol.ConfigResponse; -import com.yahoo.vespa.config.server.ConfigActivationListener; import com.yahoo.vespa.config.server.NotFoundException; +import com.yahoo.vespa.config.server.ConfigActivationListener; import com.yahoo.vespa.config.server.RequestHandler; import com.yahoo.vespa.config.server.deploy.TenantFileSystemDirs; import com.yahoo.vespa.config.server.host.HostRegistry; @@ -23,6 +23,7 @@ import com.yahoo.vespa.config.server.monitoring.MetricUpdater; import com.yahoo.vespa.config.server.monitoring.Metrics; import com.yahoo.vespa.config.server.rpc.ConfigResponseFactory; import com.yahoo.vespa.config.server.tenant.TenantRepository; +import com.yahoo.vespa.curator.CompletionTimeoutException; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.curator.transaction.CuratorTransaction; @@ -35,6 +36,7 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.Collection; import java.util.LinkedHashSet; import java.util.List; @@ -58,7 +60,6 @@ import static java.util.stream.Collectors.toSet; public class TenantApplications implements RequestHandler, HostValidator { private static final Logger log = Logger.getLogger(TenantApplications.class.getName()); - private static final Duration deleteBarrierWaitForAll = Duration.ofSeconds(5); private final Curator curator; private final ApplicationCuratorDatabase database; @@ -73,7 +74,7 @@ public class TenantApplications implements RequestHandler, HostValidator { private final MetricUpdater tenantMetricUpdater; private final Clock clock; private final TenantFileSystemDirs tenantFileSystemDirs; - private final String serverId; + private final ConfigserverConfig configserverConfig; private final ListFlag incompatibleVersions; public TenantApplications(TenantName tenant, Curator curator, StripedExecutor zkWatcherExecutor, @@ -94,7 +95,7 @@ public class TenantApplications implements RequestHandler, HostValidator { this.hostRegistry = hostRegistry; this.tenantFileSystemDirs = tenantFileSystemDirs; this.clock = clock; - this.serverId = configserverConfig.serverId(); + this.configserverConfig = configserverConfig; this.incompatibleVersions = PermanentFlags.INCOMPATIBLE_VERSIONS.bindTo(flagSource); } @@ -229,7 +230,7 @@ public class TenantApplications implements RequestHandler, HostValidator { */ public void activateApplication(ApplicationSet applicationSet, long activeSessionId) { ApplicationId id = applicationSet.getId(); - try (@SuppressWarnings("unused") Lock lock = lock(id)) { + try (Lock lock = lock(id)) { if ( ! exists(id)) return; // Application was deleted before activation. if (applicationSet.getApplicationGeneration() != activeSessionId) @@ -256,7 +257,7 @@ public class TenantApplications implements RequestHandler, HostValidator { configActivationListenersOnRemove(applicationId); tenantMetricUpdater.setApplications(applicationMapper.numApplications()); metrics.removeMetricUpdater(Metrics.createDimensions(applicationId)); - getDeleteApplicationWaiter(applicationId).notifyCompletion(); + getRemoveApplicationWaiter(applicationId).notifyCompletion(); log.log(Level.INFO, "Application removed: " + applicationId); } } @@ -268,7 +269,7 @@ public class TenantApplications implements RequestHandler, HostValidator { public void removeApplicationsExcept(Set applications) { for (ApplicationId activeApplication : applicationMapper.listApplicationIds()) { if ( ! applications.contains(activeApplication)) { - try (@SuppressWarnings("unused") var applicationLock = lock(activeApplication)){ + try (var applicationLock = lock(activeApplication)){ removeApplication(activeApplication); } } @@ -402,20 +403,147 @@ public class TenantApplications implements RequestHandler, HostValidator { public TenantFileSystemDirs getTenantFileSystemDirs() { return tenantFileSystemDirs; } - public CompletionWaiter createDeleteApplicationWaiter(ApplicationId applicationId) { - var barrierPath = barrierPathForDelete(applicationId); - return curator.createCompletionWaiter(barrierPath, applicationId.serializedForm(), serverId, deleteBarrierWaitForAll); + public CompletionWaiter createRemoveApplicationWaiter(ApplicationId applicationId) { + return RemoveApplicationWaiter.createAndInitialize(curator, applicationId, configserverConfig.serverId()); } - public CompletionWaiter getDeleteApplicationWaiter(ApplicationId applicationId) { - var barrierPath = barrierPathForDelete(applicationId).append(applicationId.serializedForm()); - return curator.getCompletionWaiter(barrierPath, serverId, deleteBarrierWaitForAll); + public CompletionWaiter getRemoveApplicationWaiter(ApplicationId applicationId) { + return RemoveApplicationWaiter.create(curator, applicationId, configserverConfig.serverId()); } - private Path barrierPathForDelete(ApplicationId applicationId) { - return TenantRepository.getBarriersPath() - .append(applicationId.tenant().value()) - .append("delete-application"); + /** + * Waiter for removing application. Will wait for some time for all servers to remove application, + * but will accept the majority of servers to have removed app if it takes a long time. + */ + // TODO: Merge with CuratorCompletionWaiter + static class RemoveApplicationWaiter implements CompletionWaiter { + + private static final java.util.logging.Logger log = Logger.getLogger(RemoveApplicationWaiter.class.getName()); + private static final Duration waitForAllDefault = Duration.ofSeconds(5); + + private final Curator curator; + private final Path barrierPath; + private final Path waiterNode; + private final Duration waitForAll; + private final Clock clock = Clock.systemUTC(); + + RemoveApplicationWaiter(Curator curator, ApplicationId applicationId, String serverId) { + this(curator, applicationId, serverId, waitForAllDefault); + } + + RemoveApplicationWaiter(Curator curator, ApplicationId applicationId, String serverId, Duration waitForAll) { + this.barrierPath = TenantRepository.getBarriersPath().append(applicationId.tenant().value()) + .append("delete-application") + .append(applicationId.serializedForm()); + this.waiterNode = barrierPath.append(serverId); + this.curator = curator; + this.waitForAll = waitForAll; + } + + @Override + public void awaitCompletion(Duration timeout) { + List respondents; + try { + respondents = awaitInternal(timeout); + } catch (Exception e) { + throw new RuntimeException(e); + } + if (respondents.size() < barrierMemberCount()) { + throw new CompletionTimeoutException("Timed out waiting for peer config servers to remove application " + + "(waited for barrier " + barrierPath + ")." + + "Got response from " + respondents + ", but need response from " + + "at least " + barrierMemberCount() + " server(s). " + + "Timeout passed as argument was " + timeout.toMillis() + " ms"); + } + } + + private List awaitInternal(Duration timeout) throws Exception { + Instant startTime = clock.instant(); + Instant endTime = startTime.plus(timeout); + Instant gotQuorumTime = Instant.EPOCH; + List respondents; + do { + respondents = curator.framework().getChildren().forPath(barrierPath.getAbsolute()); + if (log.isLoggable(Level.FINE)) { + log.log(Level.FINE, respondents.size() + "/" + curator.zooKeeperEnsembleCount() + " responded: " + + respondents + ", all participants: " + curator.zooKeeperEnsembleConnectionSpec()); + } + + // If all config servers responded, return + if (respondents.size() == curator.zooKeeperEnsembleCount()) { + logBarrierCompleted(respondents, startTime); + break; + } + + // If some are missing, quorum is enough, but wait for all up to 5 seconds before returning + if (respondents.size() >= barrierMemberCount()) { + if (gotQuorumTime.isBefore(startTime)) + gotQuorumTime = clock.instant(); + + // Give up if more than some time has passed since we got quorum, otherwise continue + if (Duration.between(clock.instant(), gotQuorumTime.plus(waitForAll)).isNegative()) { + logBarrierCompleted(respondents, startTime); + break; + } + } + + Thread.sleep(100); + } while (clock.instant().isBefore(endTime)); + + return respondents; + } + + private void logBarrierCompleted(List respondents, Instant startTime) { + Duration duration = Duration.between(startTime, Instant.now()); + Level level = (duration.minus(Duration.ofSeconds(5))).isNegative() ? Level.FINE : Level.INFO; + log.log(level, () -> barrierCompletedMessage(respondents, duration)); + } + + private String barrierCompletedMessage(List respondents, Duration duration) { + return barrierPath + " completed in " + duration.toString() + + ", " + respondents.size() + "/" + curator.zooKeeperEnsembleCount() + " responded: " + respondents; + } + + @Override + public void notifyCompletion() { + try { + curator.framework().create().forPath(waiterNode.getAbsolute()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public String toString() { return "'" + barrierPath + "', " + barrierMemberCount() + " members"; } + + public static CompletionWaiter create(Curator curator, ApplicationId applicationId, String serverId) { + return new RemoveApplicationWaiter(curator, applicationId, serverId); + } + + public static CompletionWaiter create(Curator curator, ApplicationId applicationId, String serverId, Duration waitForAll) { + return new RemoveApplicationWaiter(curator, applicationId, serverId, waitForAll); + } + + public static CompletionWaiter createAndInitialize(Curator curator, ApplicationId applicationId, String serverId) { + return createAndInitialize(curator, applicationId, serverId, waitForAllDefault); + } + + public static CompletionWaiter createAndInitialize(Curator curator, ApplicationId applicationId, String serverId, Duration waitForAll) { + RemoveApplicationWaiter waiter = new RemoveApplicationWaiter(curator, applicationId, serverId, waitForAll); + + // Cleanup and create a new barrier path + Path barrierPath = waiter.barrierPath(); + curator.delete(barrierPath); + curator.create(barrierPath.getParentPath()); + curator.createAtomically(barrierPath); + + return waiter; + } + + private int barrierMemberCount() { return (curator.zooKeeperEnsembleCount() / 2) + 1; /* majority */ } + + private Path barrierPath() { return barrierPath; } + } } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/application/TenantApplicationsTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/application/TenantApplicationsTest.java index 1edc2d9bebc..728f3e8510f 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/application/TenantApplicationsTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/application/TenantApplicationsTest.java @@ -12,8 +12,8 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.TenantName; import com.yahoo.text.Utf8; import com.yahoo.vespa.config.ConfigKey; -import com.yahoo.vespa.config.server.ConfigActivationListener; import com.yahoo.vespa.config.server.ConfigServerDB; +import com.yahoo.vespa.config.server.ConfigActivationListener; import com.yahoo.vespa.config.server.ServerCache; import com.yahoo.vespa.config.server.deploy.TenantFileSystemDirs; import com.yahoo.vespa.config.server.host.HostRegistry; @@ -26,6 +26,7 @@ import com.yahoo.vespa.config.server.tenant.TestTenantRepository; import com.yahoo.vespa.curator.CompletionTimeoutException; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.mock.MockCurator; +import com.yahoo.vespa.curator.mock.MockCuratorFramework; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.model.VespaModel; @@ -36,17 +37,22 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.xml.sax.SAXException; + import java.io.File; import java.io.IOException; import java.time.Clock; import java.time.Duration; import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.IntStream; +import static com.yahoo.vespa.config.server.application.TenantApplications.RemoveApplicationWaiter; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -189,6 +195,7 @@ public class TenantApplicationsTest { public static class MockConfigActivationListener implements ConfigActivationListener { public final AtomicInteger activated = new AtomicInteger(0); final AtomicInteger removed = new AtomicInteger(0); + final Map> tenantHosts = new LinkedHashMap<>(); @Override public void configActivated(ApplicationSet application) { @@ -240,22 +247,22 @@ public class TenantApplicationsTest { @Test public void testRemoveApplication2of3Respond() throws InterruptedException { - TenantApplications applications = createZKAppRepo(new InMemoryFlagSource()); - Thread t1 = setupWaiter(applications); - notifyCompletion(applications, 2); + Curator curator = new MockCurator3ConfigServers(); + Thread t1 = setupWaiter(curator); + notifyCompletion(curator, 2); t1.join(); } @Test public void testRemoveApplicationAllRespond() throws InterruptedException { - TenantApplications applications = createZKAppRepo(new InMemoryFlagSource()); - Thread t1 = setupWaiter(applications); - notifyCompletion(applications, 3); + Curator curator = new MockCurator3ConfigServers(); + Thread t1 = setupWaiter(curator); + notifyCompletion(curator, 3); t1.join(); } - private Thread setupWaiter(TenantApplications applications) { - Curator.CompletionWaiter waiter = applications.createDeleteApplicationWaiter(createApplicationId()); + private Thread setupWaiter(Curator curator) { + Curator.CompletionWaiter waiter = RemoveApplicationWaiter.createAndInitialize(curator, createApplicationId(), "cfg1", Duration.ofSeconds(1)); Thread t1 = new Thread(() -> { try { waiter.awaitCompletion(Duration.ofSeconds(120)); @@ -267,10 +274,10 @@ public class TenantApplicationsTest { return t1; } - private void notifyCompletion(TenantApplications applications, int respondentCount) { + private void notifyCompletion(Curator curator, int respondentCount) { IntStream.range(0, respondentCount) - .forEach(i -> applications.getDeleteApplicationWaiter(createApplicationId()) - .notifyCompletion()); + .forEach(i -> RemoveApplicationWaiter.create(curator, createApplicationId(), "cfg" + i, Duration.ofSeconds(1)) + .notifyCompletion()); } private TenantApplications createZKAppRepo() { @@ -325,4 +332,12 @@ public class TenantApplicationsTest { flagSource); } + private static class MockCurator3ConfigServers extends Curator { + + public MockCurator3ConfigServers() { + super("host1:2181,host2:2181,host3:2181", "host1:2181,host2:2181,host3:2181", (retryPolicy) -> new MockCuratorFramework(true, false)); + } + + } + } -- cgit v1.2.3