summaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-03-05 00:29:50 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-03-05 00:29:50 +0100
commit195f2648952861812e6aadc8835e5f0cdc8d7a79 (patch)
treecddcee8f08db264d602dadb5819354dd39df8884 /orchestrator
parent31ea7d17f98e744497f9ff834e50d3612ca2678b (diff)
Support cleanup of status service
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java6
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java13
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java6
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java27
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java17
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java3
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java95
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java6
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java9
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java2
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java11
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java2
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java164
14 files changed, 313 insertions, 60 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
index af59eb11e9b..b1f205f69d8 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
@@ -23,6 +23,7 @@ public class OrchestratorContext implements AutoCloseable {
private static final Logger logger = Logger.getLogger(OrchestratorContext.class.getName());
private static final Duration DEFAULT_TIMEOUT_FOR_SINGLE_OP = Duration.ofSeconds(10);
private static final Duration DEFAULT_TIMEOUT_FOR_BATCH_OP = Duration.ofSeconds(60);
+ private static final Duration DEFAULT_TIMEOUT_FOR_ADMIN_OP = Duration.ofMinutes(5);
private static final Duration TIMEOUT_OVERHEAD = Duration.ofMillis(500);
private final Optional<OrchestratorContext> parent;
@@ -55,6 +56,11 @@ public class OrchestratorContext implements AutoCloseable {
false, false, usePermanentlyDownStatus);
}
+ public static OrchestratorContext createContextForAdminOp(Clock clock) {
+ return new OrchestratorContext(null, clock, TimeBudget.fromNow(clock, DEFAULT_TIMEOUT_FOR_ADMIN_OP),
+ false, false, false);
+ }
+
private OrchestratorContext(OrchestratorContext parentOrNull,
Clock clock,
TimeBudget timeBudget,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index d4b55e92271..5ac5c66cc7c 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -104,6 +104,8 @@ public class OrchestratorImpl implements Orchestrator {
this.clock = clock;
this.applicationApiFactory = applicationApiFactory;
this.retireWithPermanentlyDownFlag = Flags.RETIRE_WITH_PERMANENTLY_DOWN.bindTo(flagSource);
+
+ serviceMonitor.registerListener(statusService);
}
@Override
@@ -353,6 +355,10 @@ public class OrchestratorImpl implements Orchestrator {
throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException{
OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
ApplicationInstanceReference reference = OrchestratorUtil.toApplicationInstanceReference(appId, serviceMonitor);
+
+ ApplicationInstance application = serviceMonitor.getApplication(reference)
+ .orElseThrow(ApplicationIdNotFoundException::new);
+
try (ApplicationLock lock = statusService.lockApplication(context, reference)) {
// Short-circuit if already in wanted state
@@ -360,8 +366,6 @@ public class OrchestratorImpl implements Orchestrator {
// Set content clusters for this application in maintenance on suspend
if (status == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) {
- ApplicationInstance application = getApplicationInstance(reference);
-
HostInfos hostInfosSnapshot = lock.getHostInfos();
// Mark it allowed to be down before we manipulate the clustercontroller
@@ -421,11 +425,6 @@ public class OrchestratorImpl implements Orchestrator {
() -> new HostNameNotFoundException(hostName));
}
- private ApplicationInstance getApplicationInstance(ApplicationInstanceReference reference)
- throws ApplicationIdNotFoundException {
- return serviceMonitor.getApplication(reference).orElseThrow(ApplicationIdNotFoundException::new);
- }
-
private static void sleep(long time, TimeUnit timeUnit) {
try {
Thread.sleep(timeUnit.toMillis(time));
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java
index 1acd82662a4..22c2bcf1a79 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java
@@ -5,7 +5,6 @@ import com.yahoo.vespa.applicationmodel.HostName;
import java.util.Map;
import java.util.Set;
-import java.util.stream.Collectors;
/**
* Collection of the suspended hosts of an application.
@@ -28,4 +27,9 @@ public class HostInfos {
public HostInfo getOrNoRemarks(HostName hostname) {
return hostInfos.getOrDefault(hostname, HostInfo.createNoRemarks());
}
+
+ /** The set of hostnames that were set in ZooKeeper - used for removing orphaned hostnames. */
+ public Set<HostName> getZkHostnames() {
+ return Set.copyOf(hostInfos.keySet());
+ }
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java
index 680f3cbcc6d..7ee65ebcd0b 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.recipes.CuratorCounter;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
@@ -35,22 +36,22 @@ public class HostInfosCache implements HostInfosService {
}
}
- public HostInfos getCachedHostInfos(ApplicationInstanceReference application) {
- return suspendedHosts.computeIfAbsent(application, wrappedService::getHostInfos);
+ public HostInfos getCachedHostInfos(ApplicationInstanceReference reference) {
+ return suspendedHosts.computeIfAbsent(reference, wrappedService::getHostInfos);
}
@Override
- public HostInfos getHostInfos(ApplicationInstanceReference application) {
+ public HostInfos getHostInfos(ApplicationInstanceReference reference) {
refreshCache();
- return getCachedHostInfos(application);
+ return getCachedHostInfos(reference);
}
@Override
- public boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus) {
+ public boolean setHostStatus(ApplicationInstanceReference reference, HostName hostName, HostStatus hostStatus) {
boolean isException = true;
boolean modified = false;
try {
- modified = wrappedService.setHostStatus(application, hostName, hostStatus);
+ modified = wrappedService.setHostStatus(reference, hostName, hostStatus);
isException = false;
} finally {
if (modified || isException) {
@@ -61,4 +62,18 @@ public class HostInfosCache implements HostInfosService {
return modified;
}
+
+ @Override
+ public void removeApplication(ApplicationInstanceReference reference) {
+ wrappedService.removeApplication(reference);
+ suspendedHosts.remove(reference);
+ }
+
+ @Override
+ public void removeHosts(ApplicationInstanceReference reference, Set<HostName> hostnames) {
+ if (hostnames.size() > 0) {
+ wrappedService.removeHosts(reference, hostnames);
+ counter.next();
+ }
+ }
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java
index f5c079f9ba3..a796a55236c 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java
@@ -4,12 +4,20 @@ package com.yahoo.vespa.orchestrator.status;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
+import java.util.Set;
+
/**
* @author hakonhall
*/
interface HostInfosService {
- HostInfos getHostInfos(ApplicationInstanceReference application);
+ HostInfos getHostInfos(ApplicationInstanceReference reference);
/** Returns false if it is known that the operation was a no-op. */
- boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus);
+ boolean setHostStatus(ApplicationInstanceReference reference, HostName hostName, HostStatus hostStatus);
+
+ /** Remove application. */
+ void removeApplication(ApplicationInstanceReference reference);
+
+ /** Remove hosts for application. */
+ void removeHosts(ApplicationInstanceReference reference, Set<HostName> hostnames);
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java
index 2d6c3eb82a1..99f1f00e7dc 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosServiceImpl.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.orchestrator.status;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.jdisc.Timer;
import com.yahoo.log.LogLevel;
+import com.yahoo.path.Path;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
@@ -16,6 +17,7 @@ import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -81,6 +83,19 @@ public class HostInfosServiceImpl implements HostInfosService {
return true;
}
+ @Override
+ public void removeApplication(ApplicationInstanceReference reference) {
+ ApplicationId application = OrchestratorUtil.toApplicationId(reference);
+ curator.delete(Path.fromString(applicationPath(application)));
+ }
+
+ @Override
+ public void removeHosts(ApplicationInstanceReference reference, Set<HostName> hostnames) {
+ ApplicationId application = OrchestratorUtil.toApplicationId(reference);
+ // Remove /vespa/host-status/APPLICATION_ID/hosts/HOSTNAME
+ hostnames.forEach(hostname -> curator.delete(Path.fromString(hostPath(application, hostname))));
+ }
+
private Optional<byte[]> readBytesFromZk(String path) throws Exception {
try {
return Optional.of(curator.framework().getData().forPath(path));
@@ -101,7 +116,7 @@ public class HostInfosServiceImpl implements HostInfosService {
}
}
- private static String applicationPath(ApplicationId application) {
+ static String applicationPath(ApplicationId application) {
return "/vespa/host-status/" + application.serializedForm();
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
index 0d4076f3d59..f6d14f609ee 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
@@ -5,6 +5,7 @@ import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
+import com.yahoo.vespa.service.monitor.ServiceHostListener;
import java.util.Set;
import java.util.function.Function;
@@ -18,7 +19,7 @@ import java.util.function.Function;
* @author Tony Vaagenes
* @author smorgrav
*/
-public interface StatusService {
+public interface StatusService extends ServiceHostListener {
/**
* Returns a mutable host status registry for a locked application instance. All operations performed on
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java
index c1ca04aea75..9874903e976 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZkStatusService.java
@@ -7,10 +7,13 @@ import com.yahoo.container.jaxrs.annotation.Component;
import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.Timer;
import com.yahoo.log.LogLevel;
+import com.yahoo.path.Path;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import org.apache.zookeeper.data.Stat;
@@ -42,6 +45,7 @@ public class ZkStatusService implements StatusService {
private final HostInfosCache hostInfosCache;
private final Metric metric;
private final Timer timer;
+ private final boolean doCleanup;
/**
* A cache of metric contexts for each possible dimension map. In practice, there is one dimension map
@@ -50,19 +54,25 @@ public class ZkStatusService implements StatusService {
private final ConcurrentHashMap<Map<String, String>, Metric.Context> cachedContexts = new ConcurrentHashMap<>();
@Inject
- public ZkStatusService(@Component Curator curator, @Component Metric metric, @Component Timer timer) {
- this.curator = curator;
- this.metric = metric;
- this.timer = timer;
- this.hostInfosCache = new HostInfosCache(curator, new HostInfosServiceImpl(curator, timer));
+ public ZkStatusService(
+ @Component Curator curator,
+ @Component Metric metric,
+ @Component Timer timer,
+ @Component FlagSource flagSource) {
+ this(curator,
+ metric,
+ timer,
+ new HostInfosCache(curator, new HostInfosServiceImpl(curator, timer)),
+ Flags.CLEANUP_STATUS_SERVICE.bindTo(flagSource).value());
}
/** Non-private for testing only. */
- ZkStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache) {
+ ZkStatusService(Curator curator, Metric metric, Timer timer, HostInfosCache hostInfosCache, boolean doCleanup) {
this.curator = curator;
this.metric = metric;
this.timer = timer;
this.hostInfosCache = hostInfosCache;
+ this.doCleanup = doCleanup;
}
@Override
@@ -214,8 +224,79 @@ public class ZkStatusService implements StatusService {
}
}
+ /**
+ * Remove all host-related data in ZooKeeper for all hostnames outside the given set.
+ */
+ @Override
+ public void onApplicationActivate(ApplicationInstanceReference reference, Set<HostName> hostnames) {
+ if (doCleanup) {
+ withLockForAdminOp(reference, " was activated", () -> {
+ HostInfos hostInfos = hostInfosCache.getCachedHostInfos(reference);
+ Set<HostName> toRemove = new HashSet<>(hostInfos.getZkHostnames());
+ toRemove.removeAll(hostnames);
+ if (toRemove.size() > 0) {
+ log.log(LogLevel.INFO, "Removing " + toRemove + " of " + reference + " from status service");
+ hostInfosCache.removeHosts(reference, toRemove);
+ }
+ });
+ } else {
+ log.log(LogLevel.INFO, "Would have removed orphaned hosts of " + reference + " from status service");
+ }
+ }
+
+ /**
+ * Remove the application from ZooKeeper.
+ *
+ * <ol>
+ * <li>/vespa/host-status/APPLICATION_ID (should just be ./hosts/*)</li>
+ * <li>/vespa/host-status-service/REFERENCE/hosts-allowed-down (should just be ./*)</li>
+ * <li>/vespa/application-status-service/REFERENCE (should just be .)</li>
+ * </ol>
+ */
+ @Override
+ public void onApplicationRemove(ApplicationInstanceReference reference) {
+ if (doCleanup) {
+ withLockForAdminOp(reference, " was removed", () -> {
+ log.log(LogLevel.INFO, "Removing application " + reference + " from status service");
+
+ // /vespa/application-status-service/REFERENCE
+ curator.delete(Path.fromString(applicationInstanceSuspendedPath(reference)));
+
+ // /vespa/host-status-service/REFERENCE/hosts-allowed-down
+ curator.delete(Path.fromString(hostsAllowedDownPath(reference)));
+
+ // /vespa/host-status/APPLICATION_ID
+ hostInfosCache.removeApplication(reference);
+ });
+ } else {
+ log.log(LogLevel.INFO, "Would have removed application " + reference + " from status service");
+ }
+ }
+
+ private void withLockForAdminOp(ApplicationInstanceReference reference,
+ String eventDescription,
+ Runnable runnable) {
+ OrchestratorContext context = OrchestratorContext.createContextForAdminOp(timer.toUtcClock());
+
+ final ApplicationLock lock;
+ try {
+ lock = lockApplication(context, reference);
+ } catch (RuntimeException e) {
+ log.log(LogLevel.ERROR, "Failed to get Orchestrator lock on when " + reference +
+ eventDescription + ": " + e.getMessage());
+ return;
+ }
+
+ try (lock) {
+ runnable.run();
+ } catch (RuntimeException e) {
+ log.log(LogLevel.ERROR, "Failed to clean up after " + reference + eventDescription +
+ ": " + e.getMessage());
+ }
+ }
+
static String applicationInstanceReferencePath(ApplicationInstanceReference reference) {
- return HOST_STATUS_BASE_PATH + '/' + reference.tenantId() + ":" + reference.applicationInstanceId();
+ return HOST_STATUS_BASE_PATH + '/' + reference.asString();
}
private static String hostsAllowedDownPath(ApplicationInstanceReference reference) {
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java
index 09fb6296866..0da4c48d408 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java
@@ -18,7 +18,7 @@ import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
-import java.util.HashSet;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -37,7 +37,7 @@ public class DummyServiceMonitor implements ServiceMonitor {
public static final HostName TEST3_HOST_NAME = new HostName("test3.hostname.tld");
public static final HostName TEST6_HOST_NAME = new HostName("test6.hostname.tld");
- private static final Set<ApplicationInstance> apps = new HashSet<>();
+ private static final List<ApplicationInstance> apps = new ArrayList<>();
static {
apps.add(new ApplicationInstance(
@@ -177,7 +177,7 @@ public class DummyServiceMonitor implements ServiceMonitor {
return hosts;
}
- public static Set<ApplicationInstance> getApplications() {
+ public static List<ApplicationInstance> getApplications() {
return apps;
}
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index 9bcbdba074e..c5c5bd1e625 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -76,6 +76,8 @@ public class OrchestratorImplTest {
private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
+ private final MockCurator curator = new MockCurator();
+ private ZkStatusService statusService = new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource);
private ApplicationId app1;
private ApplicationId app2;
@@ -96,7 +98,7 @@ public class OrchestratorImplTest {
clustercontroller = new ClusterControllerClientFactoryMock();
orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clustercontroller, applicationApiFactory),
clustercontroller,
- new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer()),
+ statusService,
new DummyServiceMonitor(),
0,
new ManualClock(),
@@ -356,6 +358,9 @@ public class OrchestratorImplTest {
HostName parentHostname = new HostName("parent.vespa.ai");
+ verify(serviceMonitor, atLeastOnce()).registerListener(zookeeperStatusService);
+ verifyNoMoreInteractions(serviceMonitor);
+
orchestrator.suspendAll(parentHostname, List.of(parentHostname));
ArgumentCaptor<OrchestratorContext> contextCaptor = ArgumentCaptor.forClass(OrchestratorContext.class);
@@ -390,7 +395,7 @@ public class OrchestratorImplTest {
@Test
public void testGetHost() throws Exception {
ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock();
- StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer());
+ StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer(), flagSource);
HostName hostName = new HostName("host.yahoo.com");
TenantId tenantId = new TenantId("tenant");
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
index 3d0746e5a36..197a2a03984 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
@@ -58,7 +58,7 @@ class ModelTestUtils {
private final Map<ApplicationInstanceReference, ApplicationInstance> applications = new HashMap<>();
private final ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock();
private final Map<HostName, HostStatus> hostStatusMap = new HashMap<>();
- private final StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer());
+ private final StatusService statusService = new ZkStatusService(new MockCurator(), mock(Metric.class), new TestTimer(), flagSource);
private final TestTimer timer = new TestTimer();
private final ServiceMonitor serviceMonitor = new ServiceModelCache(() -> new ServiceModel(applications), timer);
private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory()),
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
index b620b0798be..1bf3b267a05 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -80,7 +80,7 @@ public class HostResourceTest {
private static final TenantId TENANT_ID = new TenantId("tenantId");
private static final ApplicationInstanceId APPLICATION_INSTANCE_ID = new ApplicationInstanceId("applicationId");
private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = new ZkStatusService(
- new MockCurator(), mock(Metric.class), new TestTimer());
+ new MockCurator(), mock(Metric.class), new TestTimer(), new InMemoryFlagSource());
private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
private static final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
@@ -128,7 +128,8 @@ public class HostResourceTest {
private final OrchestratorImpl alwaysAllowOrchestrator = new OrchestratorImpl(
new AlwaysAllowPolicy(),
new ClusterControllerClientFactoryMock(),
- EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, serviceMonitor,
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,
+ serviceMonitor,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
applicationApiFactory,
@@ -137,7 +138,8 @@ public class HostResourceTest {
private final OrchestratorImpl hostNotFoundOrchestrator = new OrchestratorImpl(
new AlwaysAllowPolicy(),
new ClusterControllerClientFactoryMock(),
- EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, alwaysEmptyServiceMonitor,
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,
+ alwaysEmptyServiceMonitor,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
applicationApiFactory,
@@ -240,7 +242,8 @@ public class HostResourceTest {
final OrchestratorImpl alwaysRejectResolver = new OrchestratorImpl(
new AlwaysFailPolicy(),
new ClusterControllerClientFactoryMock(),
- EVERY_HOST_IS_UP_HOST_STATUS_SERVICE, serviceMonitor,
+ EVERY_HOST_IS_UP_HOST_STATUS_SERVICE,
+ serviceMonitor,
SERVICE_MONITOR_CONVERGENCE_LATENCY_SECONDS,
clock,
applicationApiFactory,
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java
index c5d390050ee..4c01572f787 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusService2Test.java
@@ -32,7 +32,7 @@ public class ZkStatusService2Test {
private final Timer timer = new TestTimer();
private final Metric metric = mock(Metric.class);
private final HostInfosCache cache = mock(HostInfosCache.class);
- private final ZkStatusService zkStatusService = new ZkStatusService(curator, metric, timer, cache);
+ private final ZkStatusService zkStatusService = new ZkStatusService(curator, metric, timer, cache, false);
private final OrchestratorContext context = mock(OrchestratorContext.class);
private final InterProcessMutex mutex = mock(InterProcessMutex.class);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java
index a6d7f09d69b..3c3c337f4e1 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZkStatusServiceTest.java
@@ -1,24 +1,30 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.status;
+import com.yahoo.config.provision.ApplicationId;
import com.yahoo.exception.ExceptionUtils;
import com.yahoo.jdisc.Metric;
import com.yahoo.jdisc.Timer;
import com.yahoo.jdisc.test.TestTimer;
import com.yahoo.log.LogLevel;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
+import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import com.yahoo.vespa.orchestrator.TestIds;
import com.yahoo.yolean.Exceptions;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.test.KillSession;
import org.apache.curator.test.TestingServer;
+import org.apache.zookeeper.data.Stat;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeMatcher;
import org.junit.After;
-import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.ArgumentCaptor;
@@ -40,6 +46,8 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
import static org.hamcrest.core.Is.is;
import static org.hamcrest.core.IsCollectionContaining.hasItem;
@@ -55,25 +63,26 @@ import static org.mockito.Mockito.when;
@RunWith(MockitoJUnitRunner.class)
public class ZkStatusServiceTest {
private TestingServer testingServer;
- private ZkStatusService zkStatusService;
+ private ZkStatusService statusService;
private Curator curator;
private final Timer timer = mock(Timer.class);
private final Metric metric = mock(Metric.class);
private final OrchestratorContext context = mock(OrchestratorContext.class);
+ private InMemoryFlagSource flagSource = new InMemoryFlagSource();
@Captor
private ArgumentCaptor<Map<String, String>> captor;
- @Before
public void setUp() throws Exception {
Logger.getLogger("").setLevel(LogLevel.WARNING);
testingServer = new TestingServer();
curator = createConnectedCurator(testingServer);
- zkStatusService = new ZkStatusService(curator, metric, timer);
+ statusService = new ZkStatusService(curator, metric, timer, flagSource);
when(context.getTimeLeft()).thenReturn(Duration.ofSeconds(10));
when(context.isProbe()).thenReturn(false);
when(timer.currentTime()).thenReturn(Instant.ofEpochMilli(1));
+ when(timer.toUtcClock()).thenReturn(new ManualClock(Instant.ofEpochMilli(1)));
}
private static Curator createConnectedCurator(TestingServer server) throws InterruptedException {
@@ -94,20 +103,22 @@ public class ZkStatusServiceTest {
}
@Test
- public void host_state_for_unknown_hosts_is_no_remarks() {
+ public void host_state_for_unknown_hosts_is_no_remarks() throws Exception {
+ setUp();
assertThat(
- zkStatusService.getHostInfo(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1).status(),
+ statusService.getHostInfo(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1).status(),
is(HostStatus.NO_REMARKS));
}
@Test
- public void setting_host_state_is_idempotent() {
+ public void setting_host_state_is_idempotent() throws Exception {
+ setUp();
when(timer.currentTime()).thenReturn(
Instant.ofEpochMilli((1)),
Instant.ofEpochMilli((3)),
Instant.ofEpochMilli(6));
- try (ApplicationLock lock = zkStatusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
//shuffling to catch "clean database" failures for all cases.
for (HostStatus hostStatus: shuffledList(HostStatus.NO_REMARKS, HostStatus.ALLOWED_TO_BE_DOWN)) {
@@ -138,10 +149,12 @@ public class ZkStatusServiceTest {
@Test
public void locks_are_exclusive() throws Exception {
- ZkStatusService zkStatusService2 = new ZkStatusService(curator, mock(Metric.class), new TestTimer());
+ setUp();
+ ZkStatusService zkStatusService2 =
+ new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource);
final CompletableFuture<Void> lockedSuccessfullyFuture;
- try (ApplicationLock lock = zkStatusService
+ try (ApplicationLock lock = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
lockedSuccessfullyFuture = CompletableFuture.runAsync(() -> {
@@ -163,9 +176,11 @@ public class ZkStatusServiceTest {
@Test
public void failing_to_get_lock_closes_SessionFailRetryLoop() throws Exception {
- ZkStatusService zkStatusService2 = new ZkStatusService(curator, mock(Metric.class), new TestTimer());
+ setUp();
+ ZkStatusService zkStatusService2 =
+ new ZkStatusService(curator, mock(Metric.class), new TestTimer(), flagSource);
- try (ApplicationLock lock = zkStatusService
+ try (ApplicationLock lock = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
//must run in separate thread, since having 2 locks in the same thread fails
@@ -232,59 +247,160 @@ public class ZkStatusServiceTest {
}
@Test
- public void suspend_and_resume_application_works_and_is_symmetric() {
+ public void suspend_and_resume_application_works_and_is_symmetric() throws Exception {
+
+ setUp();
// Initial state is NO_REMARK
assertThat(
- zkStatusService
+ statusService
.getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE),
is(ApplicationInstanceStatus.NO_REMARKS));
// Suspend
- try (ApplicationLock lock = zkStatusService
+ try (ApplicationLock lock = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}
assertThat(
- zkStatusService
+ statusService
.getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE),
is(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN));
// Resume
- try (ApplicationLock lock = zkStatusService
+ try (ApplicationLock lock = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
lock.setApplicationInstanceStatus(ApplicationInstanceStatus.NO_REMARKS);
}
assertThat(
- zkStatusService
+ statusService
.getApplicationInstanceStatus(TestIds.APPLICATION_INSTANCE_REFERENCE),
is(ApplicationInstanceStatus.NO_REMARKS));
}
@Test
- public void suspending_two_applications_returns_two_applications() {
- Set<ApplicationInstanceReference> suspendedApps
- = zkStatusService.getAllSuspendedApplications();
+ public void suspending_two_applications_returns_two_applications() throws Exception {
+ setUp();
+ Set<ApplicationInstanceReference> suspendedApps = statusService.getAllSuspendedApplications();
assertThat(suspendedApps.size(), is(0));
- try (ApplicationLock statusRegistry = zkStatusService
+ try (ApplicationLock statusRegistry = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
statusRegistry.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}
- try (ApplicationLock lock = zkStatusService
+ try (ApplicationLock lock = statusService
.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE2)) {
lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
}
- suspendedApps = zkStatusService.getAllSuspendedApplications();
+ suspendedApps = statusService.getAllSuspendedApplications();
assertThat(suspendedApps.size(), is(2));
assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE));
assertThat(suspendedApps, hasItem(TestIds.APPLICATION_INSTANCE_REFERENCE2));
}
+ @Test
+ public void zookeeper_cleanup() throws Exception {
+ flagSource.withBooleanFlag(Flags.CLEANUP_STATUS_SERVICE.id(), true);
+ setUp();
+
+ HostName strayHostname = new HostName("stray1.com");
+
+ try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
+ lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ lock.setHostState(TestIds.HOST_NAME1, HostStatus.ALLOWED_TO_BE_DOWN);
+
+ lock.setHostState(strayHostname, HostStatus.PERMANENTLY_DOWN);
+ }
+
+ try (ApplicationLock lock = statusService.lockApplication(context, TestIds.APPLICATION_INSTANCE_REFERENCE2)) {
+ lock.setApplicationInstanceStatus(ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN);
+ }
+
+ ApplicationId applicationId = OrchestratorUtil.toApplicationId(TestIds.APPLICATION_INSTANCE_REFERENCE);
+ assertEquals("test-tenant:test-application:test-environment:test-region:test-instance-key",
+ TestIds.APPLICATION_INSTANCE_REFERENCE.asString());
+ assertEquals("test-tenant:test-application:test-instance-key", applicationId.serializedForm());
+ assertEquals("host1", TestIds.HOST_NAME1.s());
+
+ String hostStatusPath = "/vespa/host-status/" + applicationId.serializedForm() + "/hosts/" + TestIds.HOST_NAME1.s();
+ String lock2Path = "/vespa/host-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString() + "/lock2";
+ String applicationStatusPath = "/vespa/application-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString();
+ assertZkPathExists(true, hostStatusPath);
+ assertZkPathExists(true, lock2Path);
+ assertZkPathExists(true, applicationStatusPath);
+
+ String strayHostStatusPath = "/vespa/host-status/" + applicationId.serializedForm() + "/hosts/" + strayHostname.s();
+ String strayHostStatusServicePath = "/vespa/host-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE.asString() +
+ "/hosts-allowed-down/stray2.com";
+ String strayApplicationStatusPath = "/vespa/application-status-service/" + TestIds.APPLICATION_INSTANCE_REFERENCE2.asString();
+
+ createZkNodes(strayHostStatusServicePath);
+ assertZkPathExists(true, strayHostStatusPath);
+ assertZkPathExists(true, strayHostStatusServicePath);
+ assertZkPathExists(true, strayApplicationStatusPath);
+
+ statusService.onApplicationActivate(TestIds.APPLICATION_INSTANCE_REFERENCE2, makeHostnameSet("host1", "host2"));
+
+ // Nothing has been deleted
+ assertZkPathExists(true, hostStatusPath);
+ assertZkPathExists(true, lock2Path);
+ assertZkPathExists(true, applicationStatusPath);
+ assertZkPathExists(true, strayHostStatusPath);
+ assertZkPathExists(true, strayHostStatusServicePath);
+ assertZkPathExists(true, strayApplicationStatusPath);
+
+ statusService.onApplicationActivate(TestIds.APPLICATION_INSTANCE_REFERENCE,
+ makeHostnameSet(TestIds.HOST_NAME1.s(), "host3"));
+
+ // Stray hosts for app1 has been deleted
+ assertZkPathExists(true, hostStatusPath);
+ assertZkPathExists(true, lock2Path);
+ assertZkPathExists(true, applicationStatusPath);
+ assertZkPathExists(false, strayHostStatusPath);
+ assertZkPathExists(true, strayHostStatusServicePath);
+ assertZkPathExists(true, strayApplicationStatusPath);
+
+ statusService.onApplicationRemove(TestIds.APPLICATION_INSTANCE_REFERENCE);
+
+ // Application removed => only lock2 path and other apps are left
+ assertZkPathExists(false, hostStatusPath);
+ assertZkPathExists(true, lock2Path);
+ assertZkPathExists(false, applicationStatusPath);
+ assertZkPathExists(false, strayHostStatusPath);
+ assertZkPathExists(false, strayHostStatusServicePath);
+ assertZkPathExists(true, strayApplicationStatusPath);
+
+ }
+
+ private Set<HostName> makeHostnameSet(String... hostnames) {
+ return Stream.of(hostnames).map(HostName::new).collect(Collectors.toSet());
+ }
+
+ private void assertZkPathExists(boolean exists, String path) {
+ final Stat stat;
+ try {
+ stat = curator.framework().checkExists().forPath(path);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ assertEquals(exists, stat != null);
+ }
+
+ private void createZkNodes(String... paths) {
+ Stream.of(paths).forEach(path -> {
+ try {
+ curator.framework().create().creatingParentsIfNeeded().forPath(path);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
//TODO: move to vespajlib
@SafeVarargs
private static <T> List<T> shuffledList(T... values) {