From 195f2648952861812e6aadc8835e5f0cdc8d7a79 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Thu, 5 Mar 2020 00:29:50 +0100 Subject: Support cleanup of status service --- .../vespa/orchestrator/OrchestratorContext.java | 6 + .../yahoo/vespa/orchestrator/OrchestratorImpl.java | 13 +- .../yahoo/vespa/orchestrator/status/HostInfos.java | 6 +- .../vespa/orchestrator/status/HostInfosCache.java | 27 +++- .../orchestrator/status/HostInfosService.java | 12 +- .../orchestrator/status/HostInfosServiceImpl.java | 17 ++- .../vespa/orchestrator/status/StatusService.java | 3 +- .../vespa/orchestrator/status/ZkStatusService.java | 95 +++++++++++- .../vespa/orchestrator/DummyServiceMonitor.java | 6 +- .../vespa/orchestrator/OrchestratorImplTest.java | 9 +- .../vespa/orchestrator/model/ModelTestUtils.java | 2 +- .../orchestrator/resources/HostResourceTest.java | 11 +- .../orchestrator/status/ZkStatusService2Test.java | 2 +- .../orchestrator/status/ZkStatusServiceTest.java | 164 ++++++++++++++++++--- 14 files changed, 313 insertions(+), 60 deletions(-) (limited to 'orchestrator/src') diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java index af59eb11e9b..b1f205f69d8 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java @@ -23,6 +23,7 @@ public class OrchestratorContext implements AutoCloseable { private static final Logger logger = Logger.getLogger(OrchestratorContext.class.getName()); private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10); private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60); + private static final Duration DEFAULT_TIMEOUT_FOR_ADMIN_OP = Duration.ofMinutes(5); private static final Duration TIMEOUT_OVERHEAD = Duration.ofMillis(500); private final Optional parent; @@ -55,6 +56,11 @@ public class OrchestratorContext implements AutoCloseable { false, false, usePermanentlyDownStatus); } + public static OrchestratorContext createContextForAdminOp(Clock clock) { + return new OrchestratorContext(null, clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_ADMIN_OP), + false, false, false); + } + private OrchestratorContext(OrchestratorContext parentOrNull, Clock clock, TimeBudget timeBudget, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index d4b55e92271..5ac5c66cc7c 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -104,6 +104,8 @@ public class OrchestratorImpl implements Orchestrator { this.clock = clock; this.applicationApiFactory = applicationApiFactory; this.retireWithPermanentlyDownFlag = Flags.RETIRE_WITH_PERMANENTLY_DOWN.bindTo(flagSource); + + serviceMonitor.registerListener(statusService); } @Override @@ -353,6 +355,10 @@ public class OrchestratorImpl implements Orchestrator { throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{ OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock); ApplicationInstanceReference reference = OrchestratorUtil.toApplicationInstanceReference(appId, serviceMonitor); + + ApplicationInstance application = serviceMonitor.getApplication(reference) + .orElseThrow(ApplicationIdNotFoundException::new); + try (ApplicationLock lock = statusService.lockApplication(context, reference)) { // Short-circuit if already in wanted state @@ -360,8 +366,6 @@ public class OrchestratorImpl implements Orchestrator { // Set content clusters for this application in maintenance on suspend if (status == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { - ApplicationInstance application = getApplicationInstance(reference); - HostInfos hostInfosSnapshot = lock.getHostInfos(); // Mark it allowed to be down before we manipulate the clustercontroller @@ -421,11 +425,6 @@ public class OrchestratorImpl implements Orchestrator { () -> new HostNameNotFoundException(hostName)); } - private ApplicationInstance getApplicationInstance(ApplicationInstanceReference reference) - throws ApplicationIdNotFoundException { - return serviceMonitor.getApplication(reference).orElseThrow(ApplicationIdNotFoundException::new); - } - private static void sleep(long time, TimeUnit timeUnit) { try { Thread.sleep(timeUnit.toMillis(time)); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java index 1acd82662a4..22c2bcf1a79 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java @@ -5,7 +5,6 @@ import com.yahoo.vespa.applicationmodel.HostName; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; /** * Collection of the suspended hosts of an application. @@ -28,4 +27,9 @@ public class HostInfos { public HostInfo getOrNoRemarks(HostName hostname) { return hostInfos.getOrDefault(hostname, HostInfo.createNoRemarks()); } + + /** The set of hostnames that were set in ZooKeeper - used for removing orphaned hostnames. */ + public Set getZkHostnames() { + return Set.copyOf(hostInfos.keySet()); + } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java index 680f3cbcc6d..7ee65ebcd0b 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.recipes.CuratorCounter; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; @@ -35,22 +36,22 @@ public class HostInfosCache implements HostInfosService { } } - public HostInfos getCachedHostInfos(ApplicationInstanceReference application) { - return suspendedHosts.computeIfAbsent(application, wrappedService::getHostInfos); + public HostInfos getCachedHostInfos(ApplicationInstanceReference reference) { + return suspendedHosts.computeIfAbsent(reference, wrappedService::getHostInfos); } @Override - public HostInfos getHostInfos(ApplicationInstanceReference application) { + public HostInfos getHostInfos(ApplicationInstanceReference reference) { refreshCache(); - return getCachedHostInfos(application); + return getCachedHostInfos(reference); } @Override - public boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus) { + public boolean setHostStatus(ApplicationInstanceReference reference, HostName hostName, HostStatus hostStatus) { boolean isException = true; boolean modified = false; try { - modified = wrappedService.setHostStatus(application, hostName, hostStatus); + modified = wrappedService.setHostStatus(reference, hostName, hostStatus); isException = false; } finally { if (modified || isException) { @@ -61,4 +62,18 @@ public class HostInfosCache implements HostInfosService { return modified; } + + @Override + public void removeApplication(ApplicationInstanceReference reference) { + wrappedService.removeApplication(reference); + suspendedHosts.remove(reference); + } + + @Override + public void removeHosts(ApplicationInstanceReference reference, Set hostnames) { + if (hostnames.size() > 0) { + wrappedService.removeHosts(reference, hostnames); + counter.next(); + } + } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java index f5c079f9ba3..a796a55236c 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java @@ -4,12 +4,20 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; +import java.util.Set; + /** * @author hakonhall */ interface HostInfosService { - HostInfos getHostInfos(ApplicationInstanceReference application); + HostInfos getHostInfos(ApplicationInstanceReference reference); /** Returns false if it is known that the operation was a no-op. */ - boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus); + boolean setHostStatus(ApplicationInstanceReference reference, HostName hostName, HostStatus hostStatus); + + /** Remove application. */ + void removeApplication(ApplicationInstanceReference reference); + + /** Remove hosts for application. */ + void removeHosts(ApplicationInstanceReference reference, Set hostnames); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java index 2d6c3eb82a1..99f1f00e7dc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.orchestrator.status; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jdisc.Timer; import com.yahoo.log.LogLevel; +import com.yahoo.path.Path; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.curator.Curator; @@ -16,6 +17,7 @@ import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -81,6 +83,19 @@ public class HostInfosServiceImpl implements HostInfosService { return true; } + @Override + public void removeApplication(ApplicationInstanceReference reference) { + ApplicationId application = OrchestratorUtil.toApplicationId(reference); + curator.delete(Path.fromString(applicationPath(application))); + } + + @Override + public void removeHosts(ApplicationInstanceReference reference, Set hostnames) { + ApplicationId application = OrchestratorUtil.toApplicationId(reference); + // Remove /vespa/host-status/APPLICATION_ID/hosts/HOSTNAME + hostnames.forEach(hostname -> curator.delete(Path.fromString(hostPath(application, hostname)))); + } + private Optional readBytesFromZk(String path) throws Exception { try { return Optional.of(curator.framework().getData().forPath(path)); @@ -101,7 +116,7 @@ public class HostInfosServiceImpl implements HostInfosService { } } - private static String applicationPath(ApplicationId application) { + static String applicationPath(ApplicationId application) { return "/vespa/host-status/" + application.serializedForm(); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java index 0d4076f3d59..f6d14f609ee 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java @@ -5,6 +5,7 @@ import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.orchestrator.OrchestratorContext; +import com.yahoo.vespa.service.monitor.ServiceHostListener; import java.util.Set; import java.util.function.Function; @@ -18,7 +19,7 @@ import java.util.function.Function; * @author Tony Vaagenes * @author smorgrav */ -public interface StatusService { +public interface StatusService extends ServiceHostListener { /** * Returns a mutable host status registry for a locked application instance. All operations performed on diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java index c1ca04aea75..9874903e976 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java @@ -7,10 +7,13 @@ import com.yahoo.container.jaxrs.annotation.Component; import com.yahoo.jdisc.Metric; import com.yahoo.jdisc.Timer; import com.yahoo.log.LogLevel; +import com.yahoo.path.Path; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.orchestrator.OrchestratorContext; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import org.apache.zookeeper.data.Stat; @@ -42,6 +45,7 @@ public class ZkStatusService implements StatusService { private final HostInfosCache hostInfosCache; private final Metric metric; private final Timer timer; + private final boolean doCleanup; /** * A cache of metric contexts for each possible dimension map. In practice, there is one dimension map @@ -50,19 +54,25 @@ public class ZkStatusService implements StatusService { private final ConcurrentHashMap, Metric.Context> cachedContexts = new ConcurrentHashMap<>(); @Inject - public ZkStatusService(@Component Curator curator, @Component Metric metric, @Component Timer timer) { - this.curator = curator; - this.metric = metric; - this.timer = timer; - this.hostInfosCache = new HostInfosCache(curator, new HostInfosServiceImpl(curator, timer)); + public ZkStatusService( + @Component Curator curator, + @Component Metric metric, + @Component Timer timer, + @Component FlagSource flagSource) { + this(curator, + metric, + timer, + new HostInfosCache(curator, new HostInfosServiceImpl(curator, timer)), + Flags.CLEANUP_STATUS_SERVICE.bindTo(flagSource).value()); } /** Non-private for testing only. */ - ZkStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache) { + ZkStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache, boolean doCleanup) { this.curator = curator; this.metric = metric; this.timer = timer; this.hostInfosCache = hostInfosCache; + this.doCleanup = doCleanup; } @Override @@ -214,8 +224,79 @@ public class ZkStatusService implements StatusService { } } + /** + * Remove all host-related data in ZooKeeper for all hostnames outside the given set. + */ + @Override + public void onApplicationActivate(ApplicationInstanceReference reference, Set hostnames) { + if (doCleanup) { + withLockForAdminOp(reference, " was activated", () -> { + HostInfos hostInfos = hostInfosCache.getCachedHostInfos(reference); + Set toRemove = new HashSet<>(hostInfos.getZkHostnames()); + toRemove.removeAll(hostnames); + if (toRemove.size() > 0) { + log.log(LogLevel.INFO, "Removing " + toRemove + " of " + reference + " from status service"); + hostInfosCache.removeHosts(reference, toRemove); + } + }); + } else { + log.log(LogLevel.INFO, "Would have removed orphaned hosts of " + reference + " from status service"); + } + } + + /** + * Remove the application from ZooKeeper. + * + *
    + *
  1. /vespa/host-status/APPLICATION_ID (should just be ./hosts/*)
  2. + *
  3. /vespa/host-status-service/REFERENCE/hosts-allowed-down (should just be ./*)
  4. + *
  5. /vespa/application-status-service/REFERENCE (should just be .)
  6. + *
+ */ + @Override + public void onApplicationRemove(ApplicationInstanceReference reference) { + if (doCleanup) { + withLockForAdminOp(reference, " was removed", () -> { + log.log(LogLevel.INFO, "Removing application " + reference + " from status service"); + + // /vespa/application-status-service/REFERENCE + curator.delete(Path.fromString(applicationInstanceSuspendedPath(reference))); + + // /vespa/host-status-service/REFERENCE/hosts-allowed-down + curator.delete(Path.fromString(hostsAllowedDownPath(reference))); + + // /vespa/host-status/APPLICATION_ID + hostInfosCache.removeApplication(reference); + }); + } else { + log.log(LogLevel.INFO, "Would have removed application " + reference + " from status service"); + } + } + + private void withLockForAdminOp(ApplicationInstanceReference reference, + String eventDescription, + Runnable runnable) { + OrchestratorContext context = OrchestratorContext.createContextForAdminOp(timer.toUtcClock()); + + final ApplicationLock lock; + try { + lock = lockApplication(context, reference); + } catch (RuntimeException e) { + log.log(LogLevel.ERROR, "Failed to get Orchestrator lock on when " + reference + + eventDescription + ": " + e.getMessage()); + return; + } + + try (lock) { + runnable.run(); + } catch (RuntimeException e) { + log.log(LogLevel.ERROR, "Failed to clean up after " + reference + eventDescription + + ": " + e.getMessage()); + } + } + static String applicationInstanceReferencePath(ApplicationInstanceReference reference) { - return HOST_STATUS_BASE_PATH + '/' + reference.tenantId() + ":" + reference.applicationInstanceId(); + return HOST_STATUS_BASE_PATH + '/' + reference.asString(); } private static String hostsAllowedDownPath(ApplicationInstanceReference reference) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java index 09fb6296866..0da4c48d408 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java @@ -18,7 +18,7 @@ import com.yahoo.vespa.orchestrator.model.VespaModelUtil; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; -import java.util.HashSet; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -37,7 +37,7 @@ public class DummyServiceMonitor implements ServiceMonitor { public static final HostName TEST3_HOST_NAME = new HostName("test3.hostname.tld"); public static final HostName TEST6_HOST_NAME = new HostName("test6.hostname.tld"); - private static final Set apps = new HashSet<>(); + private static final List apps = new ArrayList<>(); static { apps.add(new ApplicationInstance( @@ -177,7 +177,7 @@ public class DummyServiceMonitor implements ServiceMonitor { return hosts; } - public static Set getApplications() { + public static List getApplications() { return apps; } } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 9bcbdba074e..c5c5bd1e625 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -76,6 +76,8 @@ public class OrchestratorImplTest { private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); + private final MockCurator curator = new MockCurator(); + private ZkStatusService statusService = new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource); private ApplicationId app1; private ApplicationId app2; @@ -96,7 +98,7 @@ public class OrchestratorImplTest { clustercontroller = new ClusterControllerClientFactoryMock(); orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clustercontroller, applicationApiFactory), clustercontroller, - new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer()), + statusService, new DummyServiceMonitor(), 0, new ManualClock(), @@ -356,6 +358,9 @@ public class OrchestratorImplTest { HostName parentHostname = new HostName("parent.vespa.ai"); + verify(serviceMonitor, atLeastOnce()).registerListener(zookeeperStatusService); + verifyNoMoreInteractions(serviceMonitor); + orchestrator.suspendAll(parentHostname, List.of(parentHostname)); ArgumentCaptor contextCaptor = ArgumentCaptor.forClass(OrchestratorContext.class); @@ -390,7 +395,7 @@ public class OrchestratorImplTest { @Test public void testGetHost() throws Exception { ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock(); - StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer()); + StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer(), flagSource); HostName hostName = new HostName("host.yahoo.com"); TenantId tenantId = new TenantId("tenant"); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java index 3d0746e5a36..197a2a03984 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java @@ -58,7 +58,7 @@ class ModelTestUtils { private final Map applications = new HashMap<>(); private final ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock(); private final Map hostStatusMap = new HashMap<>(); - private final StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer()); + private final StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer(), flagSource); private final TestTimer timer = new TestTimer(); private final ServiceMonitor serviceMonitor = new ServiceModelCache(() -> new ServiceModel(applications), timer); private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory()), diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index b620b0798be..1bf3b267a05 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -80,7 +80,7 @@ public class HostResourceTest { private static final TenantId TENANT_ID = new TenantId("tenantId"); private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId"); private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = new ZkStatusService( - new MockCurator(), mock(Metric.class), new TestTimer()); + new MockCurator(), mock(Metric.class), new TestTimer(), new InMemoryFlagSource()); private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); private static final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); @@ -128,7 +128,8 @@ public class HostResourceTest { private final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl( new AlwaysAllowPolicy(), new ClusterControllerClientFactoryMock(), - EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, serviceMonitor, + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, + serviceMonitor, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, applicationApiFactory, @@ -137,7 +138,8 @@ public class HostResourceTest { private final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl( new AlwaysAllowPolicy(), new ClusterControllerClientFactoryMock(), - EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyServiceMonitor, + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, + alwaysEmptyServiceMonitor, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, applicationApiFactory, @@ -240,7 +242,8 @@ public class HostResourceTest { final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl( new AlwaysFailPolicy(), new ClusterControllerClientFactoryMock(), - EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, serviceMonitor, + EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, + serviceMonitor, SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS, clock, applicationApiFactory, diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java index c5d390050ee..4c01572f787 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java @@ -32,7 +32,7 @@ public class ZkStatusService2Test { private final Timer timer = new TestTimer(); private final Metric metric = mock(Metric.class); private final HostInfosCache cache = mock(HostInfosCache.class); - private final ZkStatusService zkStatusService = new ZkStatusService(curator, metric, timer, cache); + private final ZkStatusService zkStatusService = new ZkStatusService(curator, metric, timer, cache, false); private final OrchestratorContext context = mock(OrchestratorContext.class); private final InterProcessMutex mutex = mock(InterProcessMutex.class); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java index a6d7f09d69b..3c3c337f4e1 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java @@ -1,24 +1,30 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.status; +import com.yahoo.config.provision.ApplicationId; import com.yahoo.exception.ExceptionUtils; import com.yahoo.jdisc.Metric; import com.yahoo.jdisc.Timer; import com.yahoo.jdisc.test.TestTimer; import com.yahoo.log.LogLevel; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.curator.Curator; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.OrchestratorContext; +import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.TestIds; import com.yahoo.yolean.Exceptions; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.test.KillSession; import org.apache.curator.test.TestingServer; +import org.apache.zookeeper.data.Stat; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.hamcrest.TypeSafeMatcher; import org.junit.After; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; @@ -40,6 +46,8 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.hamcrest.core.Is.is; import static org.hamcrest.core.IsCollectionContaining.hasItem; @@ -55,25 +63,26 @@ import static org.mockito.Mockito.when; @RunWith(MockitoJUnitRunner.class) public class ZkStatusServiceTest { private TestingServer testingServer; - private ZkStatusService zkStatusService; + private ZkStatusService statusService; private Curator curator; private final Timer timer = mock(Timer.class); private final Metric metric = mock(Metric.class); private final OrchestratorContext context = mock(OrchestratorContext.class); + private InMemoryFlagSource flagSource = new InMemoryFlagSource(); @Captor private ArgumentCaptor> captor; - @Before public void setUp() throws Exception { Logger.getLogger("").setLevel(LogLevel.WARNING); testingServer = new TestingServer(); curator = createConnectedCurator(testingServer); - zkStatusService = new ZkStatusService(curator, metric, timer); + statusService = new ZkStatusService(curator, metric, timer, flagSource); when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(10)); when(context.isProbe()).thenReturn(false); when(timer.currentTime()).thenReturn(Instant.ofEpochMilli(1)); + when(timer.toUtcClock()).thenReturn(new ManualClock(Instant.ofEpochMilli(1))); } private static Curator createConnectedCurator(TestingServer server) throws InterruptedException { @@ -94,20 +103,22 @@ public class ZkStatusServiceTest { } @Test - public void host_state_for_unknown_hosts_is_no_remarks() { + public void host_state_for_unknown_hosts_is_no_remarks() throws Exception { + setUp(); assertThat( - zkStatusService.getHostInfo(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1).status(), + statusService.getHostInfo(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1).status(), is(HostStatus.NO_REMARKS)); } @Test - public void setting_host_state_is_idempotent() { + public void setting_host_state_is_idempotent() throws Exception { + setUp(); when(timer.currentTime()).thenReturn( Instant.ofEpochMilli((1)), Instant.ofEpochMilli((3)), Instant.ofEpochMilli(6)); - try (ApplicationLock lock = zkStatusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { + try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { //shuffling to catch "clean database" failures for all cases. for (HostStatus hostStatus: shuffledList(HostStatus.NO_REMARKS, HostStatus.ALLOWED_TO_BE_DOWN)) { @@ -138,10 +149,12 @@ public class ZkStatusServiceTest { @Test public void locks_are_exclusive() throws Exception { - ZkStatusService zkStatusService2 = new ZkStatusService(curator, mock(Metric.class), new TestTimer()); + setUp(); + ZkStatusService zkStatusService2 = + new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource); final CompletableFuture lockedSuccessfullyFuture; - try (ApplicationLock lock = zkStatusService + try (ApplicationLock lock = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> { @@ -163,9 +176,11 @@ public class ZkStatusServiceTest { @Test public void failing_to_get_lock_closes_SessionFailRetryLoop() throws Exception { - ZkStatusService zkStatusService2 = new ZkStatusService(curator, mock(Metric.class), new TestTimer()); + setUp(); + ZkStatusService zkStatusService2 = + new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource); - try (ApplicationLock lock = zkStatusService + try (ApplicationLock lock = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { //must run in separate thread, since having 2 locks in the same thread fails @@ -232,59 +247,160 @@ public class ZkStatusServiceTest { } @Test - public void suspend_and_resume_application_works_and_is_symmetric() { + public void suspend_and_resume_application_works_and_is_symmetric() throws Exception { + + setUp(); // Initial state is NO_REMARK assertThat( - zkStatusService + statusService .getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE), is(ApplicationInstanceStatus.NO_REMARKS)); // Suspend - try (ApplicationLock lock = zkStatusService + try (ApplicationLock lock = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } assertThat( - zkStatusService + statusService .getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE), is(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN)); // Resume - try (ApplicationLock lock = zkStatusService + try (ApplicationLock lock = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { lock.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS); } assertThat( - zkStatusService + statusService .getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE), is(ApplicationInstanceStatus.NO_REMARKS)); } @Test - public void suspending_two_applications_returns_two_applications() { - Set suspendedApps - = zkStatusService.getAllSuspendedApplications(); + public void suspending_two_applications_returns_two_applications() throws Exception { + setUp(); + Set suspendedApps = statusService.getAllSuspendedApplications(); assertThat(suspendedApps.size(), is(0)); - try (ApplicationLock statusRegistry = zkStatusService + try (ApplicationLock statusRegistry = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } - try (ApplicationLock lock = zkStatusService + try (ApplicationLock lock = statusService .lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE2)) { lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); } - suspendedApps = zkStatusService.getAllSuspendedApplications(); + suspendedApps = statusService.getAllSuspendedApplications(); assertThat(suspendedApps.size(), is(2)); assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE)); assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE2)); } + @Test + public void zookeeper_cleanup() throws Exception { + flagSource.withBooleanFlag(Flags.CLEANUP_STATUS_SERVICE.id(), true); + setUp(); + + HostName strayHostname = new HostName("stray1.com"); + + try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) { + lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + lock.setHostState(TestIds.HOST_NAME1, HostStatus.ALLOWED_TO_BE_DOWN); + + lock.setHostState(strayHostname, HostStatus.PERMANENTLY_DOWN); + } + + try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE2)) { + lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN); + } + + ApplicationId applicationId = OrchestratorUtil.toApplicationId(TestIds.APPLICATION_INSTANCE_REFERENCE); + assertEquals("test-tenant:test-application:test-environment:test-region:test-instance-key", + TestIds.APPLICATION_INSTANCE_REFERENCE.asString()); + assertEquals("test-tenant:test-application:test-instance-key", applicationId.serializedForm()); + assertEquals("host1", TestIds.HOST_NAME1.s()); + + String hostStatusPath = "/vespa/host-status/" + applicationId.serializedForm() + "/hosts/" + TestIds.HOST_NAME1.s(); + String lock2Path = "/vespa/host-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString() + "/lock2"; + String applicationStatusPath = "/vespa/application-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString(); + assertZkPathExists(true, hostStatusPath); + assertZkPathExists(true, lock2Path); + assertZkPathExists(true, applicationStatusPath); + + String strayHostStatusPath = "/vespa/host-status/" + applicationId.serializedForm() + "/hosts/" + strayHostname.s(); + String strayHostStatusServicePath = "/vespa/host-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString() + + "/hosts-allowed-down/stray2.com"; + String strayApplicationStatusPath = "/vespa/application-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE2.asString(); + + createZkNodes(strayHostStatusServicePath); + assertZkPathExists(true, strayHostStatusPath); + assertZkPathExists(true, strayHostStatusServicePath); + assertZkPathExists(true, strayApplicationStatusPath); + + statusService.onApplicationActivate(TestIds.APPLICATION_INSTANCE_REFERENCE2, makeHostnameSet("host1", "host2")); + + // Nothing has been deleted + assertZkPathExists(true, hostStatusPath); + assertZkPathExists(true, lock2Path); + assertZkPathExists(true, applicationStatusPath); + assertZkPathExists(true, strayHostStatusPath); + assertZkPathExists(true, strayHostStatusServicePath); + assertZkPathExists(true, strayApplicationStatusPath); + + statusService.onApplicationActivate(TestIds.APPLICATION_INSTANCE_REFERENCE, + makeHostnameSet(TestIds.HOST_NAME1.s(), "host3")); + + // Stray hosts for app1 has been deleted + assertZkPathExists(true, hostStatusPath); + assertZkPathExists(true, lock2Path); + assertZkPathExists(true, applicationStatusPath); + assertZkPathExists(false, strayHostStatusPath); + assertZkPathExists(true, strayHostStatusServicePath); + assertZkPathExists(true, strayApplicationStatusPath); + + statusService.onApplicationRemove(TestIds.APPLICATION_INSTANCE_REFERENCE); + + // Application removed => only lock2 path and other apps are left + assertZkPathExists(false, hostStatusPath); + assertZkPathExists(true, lock2Path); + assertZkPathExists(false, applicationStatusPath); + assertZkPathExists(false, strayHostStatusPath); + assertZkPathExists(false, strayHostStatusServicePath); + assertZkPathExists(true, strayApplicationStatusPath); + + } + + private Set makeHostnameSet(String... hostnames) { + return Stream.of(hostnames).map(HostName::new).collect(Collectors.toSet()); + } + + private void assertZkPathExists(boolean exists, String path) { + final Stat stat; + try { + stat = curator.framework().checkExists().forPath(path); + } catch (Exception e) { + throw new RuntimeException(e); + } + + assertEquals(exists, stat != null); + } + + private void createZkNodes(String... paths) { + Stream.of(paths).forEach(path -> { + try { + curator.framework().create().creatingParentsIfNeeded().forPath(path); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + //TODO: move to vespajlib @SafeVarargs private static List shuffledList(T... values) { -- cgit v1.2.3