summaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-01-23 16:58:38 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-01-23 16:58:38 +0100
commit967fe4dd3f5a3e418b0dd6b2bcd06fe5fd460290 (patch)
tree39b89b81a04c2aab84bef99923abaf8c46a7bd3a /orchestrator
parentca80f4634d471c078047337e1d10a7f73c163900 (diff)
Since-timestamp on Orc suspended status
- Introduce a new type of suspension status: PERMANENTLY_DOWN, which is set when a node is scheduled to be removed from the application. A normal resume call will NOT clear PERMANENTLY_DOWN. - Store a JSON for each suspended host: Contains status, and a since timestamp for the time when the node was suspended. The suspension timestamp is preserved when switching statuses between suspension statuses. - The JSON is stored in a new path, void of any zone. This means that we eventually can get rid of the zone-part of the application instance reference.
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfo.java64
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java34
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java57
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java15
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java13
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java6
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java186
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/json/WireHostInfo.java48
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfoTest.java33
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfosCacheTest.java56
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java11
13 files changed, 474 insertions, 65 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index 0cf654b23b5..c9d234893cb 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -149,7 +149,7 @@ public class OrchestratorImpl implements Orchestrator {
OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock);
try (MutableStatusRegistry statusRegistry = statusService
.lockApplicationInstance_forCurrentThreadOnly(context, appInstance.reference())) {
- HostStatus currentHostState = statusRegistry.getHostStatus(hostName);
+ HostStatus currentHostState = statusRegistry.getHostInfo(hostName).status();
if (HostStatus.NO_REMARKS == currentHostState) {
return;
@@ -318,7 +318,7 @@ public class OrchestratorImpl implements Orchestrator {
}
private HostStatus getNodeStatus(ApplicationInstanceReference applicationRef, HostName hostName) {
- return statusService.getHostStatus(applicationRef, hostName);
+ return statusService.getHostInfo(applicationRef, hostName).status();
}
private void setApplicationStatus(ApplicationId appId, ApplicationInstanceStatus status)
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfo.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfo.java
new file mode 100644
index 00000000000..16db5685ae3
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfo.java
@@ -0,0 +1,64 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import java.time.Instant;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * ZooKeeper-backed information Host status information about a host.
+ *
+ * @author hakonhall
+ */
+// @Immutable
+public class HostInfo {
+ private final HostStatus status;
+ private final Optional<Instant> suspendedSince;
+
+ public static HostInfo createSuspended(HostStatus status, Instant suspendedSince) {
+ if (status == HostStatus.NO_REMARKS) {
+ throw new IllegalArgumentException("NO_REMARKS is not a suspended-status");
+ }
+
+ return new HostInfo(status, Optional.of(suspendedSince));
+ }
+
+ public static HostInfo createNoRemarks() {
+ return new HostInfo(HostStatus.NO_REMARKS, Optional.empty());
+ }
+
+ private HostInfo(HostStatus status, Optional<Instant> suspendedSince) {
+ this.status = Objects.requireNonNull(status);
+ this.suspendedSince = Objects.requireNonNull(suspendedSince);
+ }
+
+ public HostStatus status() { return status; }
+
+ /**
+ * The instant the host status was set to != NO_REMARKS. Is preserved when transitioning
+ * between non-NO_REMARKS states. Returns empty if and only if NO_REMARKS.
+ */
+ public Optional<Instant> suspendedSince() { return suspendedSince; }
+
+ @Override
+ public String toString() {
+ return "HostInfo{" +
+ ", status=" + status +
+ ", suspendedSince=" + suspendedSince +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ HostInfo hostInfo = (HostInfo) o;
+ return status == hostInfo.status &&
+ suspendedSince.equals(hostInfo.suspendedSince);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(status, suspendedSince);
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java
new file mode 100644
index 00000000000..42773e22a74
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfos.java
@@ -0,0 +1,34 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Collection of suspended hosts.
+ *
+ * @author hakonhall
+ */
+public class HostInfos {
+ private final Map<HostName, HostInfo> hostInfos;
+
+ public HostInfos(Map<HostName, HostInfo> hostInfos) {
+ this.hostInfos = Map.copyOf(hostInfos);
+ }
+
+ /** Get all suspended hostnames. */
+ public Set<HostName> suspendedHostsnames() {
+ return hostInfos.entrySet().stream()
+ .filter(entry -> entry.getValue().status() != HostStatus.NO_REMARKS)
+ .map(entry -> entry.getKey())
+ .collect(Collectors.toSet());
+ }
+
+ /** Get host info for hostname, returning a NO_REMARKS HostInfo if unknown. */
+ public HostInfo get(HostName hostname) {
+ return hostInfos.getOrDefault(hostname, HostInfo.createNoRemarks());
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java
new file mode 100644
index 00000000000..9998e7de918
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosCache.java
@@ -0,0 +1,57 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.curator.Curator;
+import com.yahoo.vespa.curator.recipes.CuratorCounter;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * @author hakonhall
+ */
+public class HostInfosCache implements HostInfosService {
+ final static String HOST_STATUS_CACHE_COUNTER_PATH = "/vespa/host-status-service-cache-counter";
+
+ private final CuratorCounter counter;
+ private final HostInfosService wrappedService;
+
+ private final AtomicLong cacheGeneration;
+ private final Map<ApplicationInstanceReference, HostInfos> suspendedHosts = new ConcurrentHashMap<>();
+
+ HostInfosCache(Curator curator, HostInfosService wrappedService) {
+ this.counter = new CuratorCounter(curator, HOST_STATUS_CACHE_COUNTER_PATH);
+ this.wrappedService = wrappedService;
+ this.cacheGeneration = new AtomicLong(counter.get());
+ }
+
+ @Override
+ public HostInfos getHostInfos(ApplicationInstanceReference application) {
+ long newCacheGeneration = counter.get();
+ if (cacheGeneration.getAndSet(newCacheGeneration) != newCacheGeneration) {
+ suspendedHosts.clear();
+ }
+
+ return suspendedHosts.computeIfAbsent(application, wrappedService::getHostInfos);
+ }
+
+ @Override
+ public boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus) {
+ boolean isException = true;
+ boolean modified = false;
+ try {
+ modified = wrappedService.setHostStatus(application, hostName, hostStatus);
+ isException = false;
+ } finally {
+ if (modified || isException) {
+ // ensure the next get, on any config server, will repopulate the cache from zk.
+ counter.next();
+ }
+ }
+
+ return modified;
+ }
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java
new file mode 100644
index 00000000000..f5c079f9ba3
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostInfosService.java
@@ -0,0 +1,15 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+
+/**
+ * @author hakonhall
+ */
+interface HostInfosService {
+ HostInfos getHostInfos(ApplicationInstanceReference application);
+
+ /** Returns false if it is known that the operation was a no-op. */
+ boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus);
+}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java
index d00de04bf63..f23917042e9 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/HostStatus.java
@@ -2,11 +2,20 @@
package com.yahoo.vespa.orchestrator.status;
/**
- * Enumeration of the different status' a host can have.
+ * Enumeration of the different statuses a host can have.
*
* @author oyving
*/
public enum HostStatus {
+ /** The services on the host is supposed to be up. */
NO_REMARKS,
- ALLOWED_TO_BE_DOWN;
+
+ /** The services on the host is allowed to be down. */
+ ALLOWED_TO_BE_DOWN,
+
+ /**
+ * Same as ALLOWED_TO_BE_DOWN, but in addition, it is expected
+ * the host may be removed from its application at any moment.
+ */
+ PERMANENTLY_DOWN;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java
index e36f0f70bbd..92b0ec50011 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/MutableStatusRegistry.java
@@ -19,10 +19,8 @@ public interface MutableStatusRegistry extends AutoCloseable {
*/
ApplicationInstanceStatus getStatus();
- /**
- * Returns the status of the given host.
- */
- HostStatus getHostStatus(HostName hostName);
+ /** Returns the host info of the given host. */
+ HostInfo getHostInfo(HostName hostName);
/**
* Returns the set of all suspended hosts for this application.
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
index 993cddae2b3..9e3ee84e1d9 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/StatusService.java
@@ -64,15 +64,9 @@ public interface StatusService {
*/
Function<ApplicationInstanceReference, Set<HostName>> getSuspendedHostsByApplication();
- /**
- * Returns the status of the given application. This is consistent if its lock is held.
- */
+ /** Returns the status of the given application. This is consistent if its lock is held.*/
ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationInstanceReference application);
-
- /**
- * Returns the status of the given host, for the given application. This is consistent if the application's lock is held.
- */
- HostStatus getHostStatus(ApplicationInstanceReference application, HostName host);
-
+ /** Get host info for hostname in application. This is consistent if its lock is held. */
+ HostInfo getHostInfo(ApplicationInstanceReference applicationInstanceReference, HostName hostName);
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
index 2d6a9299fcc..ca6f3c524a2 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -11,9 +11,9 @@ import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.Lock;
-import com.yahoo.vespa.curator.recipes.CuratorCounter;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
+import com.yahoo.vespa.orchestrator.status.json.WireHostInfo;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat;
@@ -22,8 +22,11 @@ import javax.inject.Inject;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
@@ -45,7 +48,7 @@ public class ZookeeperStatusService implements StatusService {
final static String HOST_STATUS_CACHE_COUNTER_PATH = "/vespa/host-status-service-cache-counter";
private final Curator curator;
- private final CuratorCounter counter;
+ private final HostInfosCache hostInfosCache;
private final Metric metric;
private final Timer timer;
@@ -55,18 +58,24 @@ public class ZookeeperStatusService implements StatusService {
*/
private final ConcurrentHashMap<Map<String, String>, Metric.Context> cachedContexts = new ConcurrentHashMap<>();
- /** A cache of hosts allowed to be down. Access only through {@link #getValidCache()}! */
- private final Map<ApplicationInstanceReference, Set<HostName>> hostsDown = new ConcurrentHashMap<>();
-
- private volatile long cacheRefreshedAt;
-
@Inject
public ZookeeperStatusService(@Component Curator curator, @Component Metric metric, @Component Timer timer) {
this.curator = curator;
- this.counter = new CuratorCounter(curator, HOST_STATUS_CACHE_COUNTER_PATH);
- this.cacheRefreshedAt = counter.get();
this.metric = metric;
this.timer = timer;
+
+ // Insert a cache above some ZooKeeper accesses
+ this.hostInfosCache = new HostInfosCache(curator, new HostInfosService() {
+ @Override
+ public HostInfos getHostInfos(ApplicationInstanceReference application) {
+ return ZookeeperStatusService.this.getHostInfosFromZk(application);
+ }
+
+ @Override
+ public boolean setHostStatus(ApplicationInstanceReference application, HostName hostName, HostStatus hostStatus) {
+ return ZookeeperStatusService.this.setHostStatusInZk(application, hostName, hostStatus);
+ }
+ });
}
@Override
@@ -98,8 +107,7 @@ public class ZookeeperStatusService implements StatusService {
*/
@Override
public Function<ApplicationInstanceReference, Set<HostName>> getSuspendedHostsByApplication() {
- Map<ApplicationInstanceReference, Set<HostName>> suspendedHostsByApplication = getValidCache();
- return application -> suspendedHostsByApplication.computeIfAbsent(application, this::hostsDownFor);
+ return application -> hostInfosCache.getHostInfos(application).suspendedHostsnames();
}
@@ -166,33 +174,58 @@ public class ZookeeperStatusService implements StatusService {
return Duration.between(startInstant, endInstant).toMillis() / 1000.0;
}
- private void setHostStatus(ApplicationInstanceReference applicationInstanceReference,
- HostName hostName,
- HostStatus status) {
- String path = hostAllowedDownPath(applicationInstanceReference, hostName);
+ /** Do not call this directly: should be called behind a cache. */
+ private boolean setHostStatusInZk(ApplicationInstanceReference applicationInstanceReference,
+ HostName hostName,
+ HostStatus status) {
+ String hostAllowedDownPath = hostAllowedDownPath(applicationInstanceReference, hostName);
- boolean invalidate = false;
+ boolean modified;
try {
switch (status) {
case NO_REMARKS:
- invalidate = deleteNode_ignoreNoNodeException(path, "Host already has state NO_REMARKS, path = " + path);
+ modified = deleteNode_ignoreNoNodeException(hostAllowedDownPath, "Host already has state NO_REMARKS, path = " + hostAllowedDownPath);
break;
case ALLOWED_TO_BE_DOWN:
- invalidate = createNode_ignoreNodeExistsException(path, "Host already has state ALLOWED_TO_BE_DOWN, path = " + path);
+ modified = createNode_ignoreNodeExistsException(hostAllowedDownPath, "Host already has state ALLOWED_TO_BE_DOWN, path = " + hostAllowedDownPath);
break;
default:
throw new IllegalArgumentException("Unexpected status '" + status + "'.");
}
+
+ modified |= setHostInfoInZk(applicationInstanceReference, hostName, status);
} catch (Exception e) {
- invalidate = true;
throw new RuntimeException(e);
}
- finally {
- if (invalidate) {
- counter.next();
- hostsDown.remove(applicationInstanceReference);
- }
+
+ return modified;
+ }
+
+ /** Returns false if no changes were made. */
+ private boolean setHostInfoInZk(ApplicationInstanceReference application, HostName hostname, HostStatus status)
+ throws Exception {
+ String path = hostPath(application, hostname);
+
+ if (status == HostStatus.NO_REMARKS) {
+ return deleteNode_ignoreNoNodeException(path, "Host already has state NO_REMARKS, path = " + path);
}
+
+ Optional<HostInfo> currentHostInfo = readBytesFromZk(path).map(WireHostInfo::deserialize);
+ if (currentHostInfo.isEmpty()) {
+ Instant suspendedSince = timer.currentTime();
+ HostInfo hostInfo = HostInfo.createSuspended(status, suspendedSince);
+ byte[] hostInfoBytes = WireHostInfo.serialize(hostInfo);
+ curator.framework().create().creatingParentsIfNeeded().forPath(path, hostInfoBytes);
+ } else if (currentHostInfo.get().status() == status) {
+ return false;
+ } else {
+ Instant suspendedSince = currentHostInfo.get().suspendedSince().orElseGet(timer::currentTime);
+ HostInfo hostInfo = HostInfo.createSuspended(status, suspendedSince);
+ byte[] hostInfoBytes = WireHostInfo.serialize(hostInfo);
+ curator.framework().setData().forPath(path, hostInfoBytes);
+ }
+
+ return true;
}
private boolean deleteNode_ignoreNoNodeException(String path, String debugLogMessageIfNotExists) throws Exception {
@@ -217,20 +250,25 @@ public class ZookeeperStatusService implements StatusService {
}
}
- @Override
- public HostStatus getHostStatus(ApplicationInstanceReference applicationInstanceReference, HostName hostName) {
- return getValidCache().computeIfAbsent(applicationInstanceReference, this::hostsDownFor)
- .contains(hostName) ? HostStatus.ALLOWED_TO_BE_DOWN : HostStatus.NO_REMARKS;
+ private Optional<byte[]> readBytesFromZk(String path) throws Exception {
+ try {
+ return Optional.of(curator.framework().getData().forPath(path));
+ } catch (NoNodeException e) {
+ return Optional.empty();
+ }
}
- /** Holding an application's lock ensures the cache is up to date for that application. */
- private Map<ApplicationInstanceReference, Set<HostName>> getValidCache() {
- long cacheGeneration = counter.get();
- if (counter.get() != cacheRefreshedAt) {
- cacheRefreshedAt = cacheGeneration;
- hostsDown.clear();
- }
- return hostsDown;
+ private void updateNodeInZk(String path, byte[] bytes) throws Exception {
+ curator.framework().setData().forPath(path, bytes);
+ }
+
+ private void createNodeInZk(String path, byte[] bytes) throws Exception {
+ curator.framework().create().creatingParentsIfNeeded().forPath(path, bytes);
+ }
+
+ @Override
+ public HostInfo getHostInfo(ApplicationInstanceReference applicationInstanceReference, HostName hostName) {
+ return hostInfosCache.getHostInfos(applicationInstanceReference).get(hostName);
}
private Set<HostName> hostsDownFor(ApplicationInstanceReference application) {
@@ -247,6 +285,55 @@ public class ZookeeperStatusService implements StatusService {
}
}
+ /** Do not call this directly: should be called behind a cache. */
+ private HostInfos getHostInfosFromZk(ApplicationInstanceReference application) {
+ Map<HostName, HostInfo> hostInfos;
+ String hostsRootPath = hostsPath(application);
+ if (uncheck(() -> curator.framework().checkExists().forPath(hostsRootPath)) == null) {
+ hostInfos = new HashMap<>();
+ } else {
+ List<String> hostnames = uncheck(() -> curator.framework().getChildren().forPath(hostsRootPath));
+ hostInfos = new HashMap<>(hostnames.stream().collect(Collectors.toMap(
+ hostname -> new HostName(hostname),
+ hostname -> {
+ byte[] bytes = uncheck(() -> curator.framework().getData().forPath(hostsRootPath + "/" + hostname));
+ return WireHostInfo.deserialize(bytes);
+ })));
+ }
+
+ // For backwards compatibility we'll add HostInfos from the old hosts-allowed-down ZK path,
+ // using the creation time as the since path. The new hosts ZK path should contain a subset of
+ // the hostnames under hosts-allowed-down ZK path. Eventually these sets should be identical.
+ // Once that's true we can stop writing to hosts-allowed-down, remove this code, and all
+ // data in hosts-allowed-down can be removed.
+ Set<HostName> legacyHostsDown = hostsDownFor(application);
+ Map<HostName, HostInfo> legacyHostInfos = legacyHostsDown.stream().collect(Collectors.toMap(
+ hostname -> hostname,
+ hostname -> {
+ Stat stat = uncheck(() -> curator.framework()
+ .checkExists()
+ .forPath(hostsAllowedDownPath(application) + "/" + hostname.s()));
+ return HostInfo.createSuspended(HostStatus.ALLOWED_TO_BE_DOWN, Instant.ofEpochMilli(stat.getCtime()));
+ }
+ ));
+
+ hostInfos.putAll(legacyHostInfos);
+ return new HostInfos(hostInfos);
+ }
+
+ private <T> T uncheck(SupplierThrowingException<T> supplier) {
+ try {
+ return supplier.get();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @FunctionalInterface
+ interface SupplierThrowingException<T> {
+ T get() throws Exception;
+ }
+
@Override
public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationInstanceReference applicationInstanceReference) {
try {
@@ -259,17 +346,26 @@ public class ZookeeperStatusService implements StatusService {
}
}
- private static String applicationInstancePath(ApplicationInstanceReference applicationInstanceReference) {
+ static String applicationInstanceReferencePath(ApplicationInstanceReference applicationInstanceReference) {
return HOST_STATUS_BASE_PATH + '/' +
applicationInstanceReference.tenantId() + ":" + applicationInstanceReference.applicationInstanceId();
}
+ private static String applicationPath(ApplicationInstanceReference applicationInstanceReference) {
+ ApplicationId applicationId = OrchestratorUtil.toApplicationId(applicationInstanceReference);
+ return "/vespa/host-status/" + applicationId.serializedForm();
+ }
+
+ private static String hostsPath(ApplicationInstanceReference applicationInstanceReference) {
+ return applicationPath(applicationInstanceReference) + "/hosts";
+ }
+
private static String hostsAllowedDownPath(ApplicationInstanceReference applicationInstanceReference) {
- return applicationInstancePath(applicationInstanceReference) + "/hosts-allowed-down";
+ return applicationInstanceReferencePath(applicationInstanceReference) + "/hosts-allowed-down";
}
private static String applicationInstanceLock2Path(ApplicationInstanceReference applicationInstanceReference) {
- return applicationInstancePath(applicationInstanceReference) + "/lock2";
+ return applicationInstanceReferencePath(applicationInstanceReference) + "/lock2";
}
private String applicationInstanceSuspendedPath(ApplicationInstanceReference applicationInstanceReference) {
@@ -280,6 +376,10 @@ public class ZookeeperStatusService implements StatusService {
return hostsAllowedDownPath(applicationInstanceReference) + '/' + hostname.s();
}
+ private static String hostPath(ApplicationInstanceReference application, HostName hostname) {
+ return hostsPath(application) + "/" + hostname.s();
+ }
+
private class ZkMutableStatusRegistry implements MutableStatusRegistry {
private final Lock lock;
@@ -303,20 +403,20 @@ public class ZookeeperStatusService implements StatusService {
}
@Override
- public HostStatus getHostStatus(HostName hostName) {
- return ZookeeperStatusService.this.getHostStatus(applicationInstanceReference, hostName);
+ public HostInfo getHostInfo(HostName hostName) {
+ return ZookeeperStatusService.this.getHostInfo(applicationInstanceReference, hostName);
}
@Override
public Set<HostName> getSuspendedHosts() {
- return getValidCache().computeIfAbsent(applicationInstanceReference, ZookeeperStatusService.this::hostsDownFor);
+ return hostInfosCache.getHostInfos(applicationInstanceReference).suspendedHostsnames();
}
@Override
public void setHostState(final HostName hostName, final HostStatus status) {
if (probe) return;
log.log(LogLevel.INFO, "Setting host " + hostName + " to status " + status);
- setHostStatus(applicationInstanceReference, hostName, status);
+ hostInfosCache.setHostStatus(applicationInstanceReference, hostName, status);
}
@Override
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/json/WireHostInfo.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/json/WireHostInfo.java
new file mode 100644
index 00000000000..d0d2e523f50
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/json/WireHostInfo.java
@@ -0,0 +1,48 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status.json;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.yahoo.vespa.orchestrator.status.HostInfo;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.util.Objects;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
+
+/**
+ * Handles serialization/deserialization of HostInfo to/from byte array.
+ *
+ * @author hakonhall
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+@JsonInclude(JsonInclude.Include.NON_NULL)
+public class WireHostInfo {
+ private static final ObjectMapper mapper = new ObjectMapper();
+
+ @JsonProperty("status") public String status;
+ @JsonProperty("suspendedSince") public Long suspendedSinceInMillis;
+
+ public static HostInfo deserialize(byte[] bytes) {
+ String serializedString = new String(bytes, StandardCharsets.UTF_8);
+ WireHostInfo wireHostInfo = uncheck(() -> mapper.readValue(serializedString, WireHostInfo.class));
+ return HostInfo.createSuspended(HostStatus.valueOf(Objects.requireNonNull(wireHostInfo.status)),
+ Instant.ofEpochMilli(Objects.requireNonNull(wireHostInfo.suspendedSinceInMillis)));
+ }
+
+ public static byte[] serialize(HostInfo hostInfo) {
+ if (hostInfo.status() == HostStatus.NO_REMARKS) {
+ throw new IllegalArgumentException("Serialization of NO_REMARKS is not supported");
+ }
+
+ WireHostInfo wireHostInfo = new WireHostInfo();
+ wireHostInfo.status = hostInfo.status().name();
+ wireHostInfo.suspendedSinceInMillis = hostInfo.suspendedSince().get().toEpochMilli();
+
+ return uncheck(() -> mapper.writeValueAsBytes(wireHostInfo));
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfoTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfoTest.java
new file mode 100644
index 00000000000..539a10a695d
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfoTest.java
@@ -0,0 +1,33 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.jdisc.test.TestTimer;
+import com.yahoo.vespa.orchestrator.status.json.WireHostInfo;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+/**
+ * @author hakonhall
+ */
+public class HostInfoTest {
+ private final TestTimer timer = new TestTimer();
+
+ @Before
+ public void setUp() {
+ timer.setMillis(3L);
+ }
+
+ @Test
+ public void equality() {
+ HostInfo info1 = HostInfo.createSuspended(HostStatus.ALLOWED_TO_BE_DOWN, timer.currentTime());
+ assertNotEquals(info1, HostInfo.createNoRemarks());
+ assertNotEquals(info1, HostInfo.createSuspended(HostStatus.PERMANENTLY_DOWN, timer.currentTime()));
+
+ byte[] serialized = WireHostInfo.serialize(info1);
+ HostInfo deserialized1 = WireHostInfo.deserialize(serialized);
+ assertEquals(info1, deserialized1);
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfosCacheTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfosCacheTest.java
new file mode 100644
index 00000000000..81a160b8636
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/HostInfosCacheTest.java
@@ -0,0 +1,56 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.status;
+
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.curator.mock.MockCurator;
+import org.junit.Test;
+
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+/**
+ * @author hakonhall
+ */
+public class HostInfosCacheTest {
+ @Test
+ public void test() {
+ MockCurator curator = new MockCurator();
+ HostInfosService service = mock(HostInfosService.class);
+ HostInfosCache cache = new HostInfosCache(curator, service);
+
+ ApplicationInstanceReference application = new ApplicationInstanceReference(
+ new TenantId("tenantid"),
+ new ApplicationInstanceId("application:dev:region:default"));
+
+ HostInfos hostInfos = mock(HostInfos.class);
+ when(service.getHostInfos(application)).thenReturn(hostInfos);
+
+ // cache miss
+ HostInfos hostInfos1 = cache.getHostInfos(application);
+ verify(service, times(1)).getHostInfos(any());
+ assertSame(hostInfos1, hostInfos);
+
+ // cache hit
+ HostInfos hostInfos2 = cache.getHostInfos(application);
+ verify(service, times(1)).getHostInfos(any());
+ assertSame(hostInfos2, hostInfos);
+
+ when(service.setHostStatus(any(), any(), any())).thenReturn(true);
+ boolean modified = cache.setHostStatus(application, new HostName("hostname1"), HostStatus.ALLOWED_TO_BE_DOWN);
+ verify(service, times(1)).getHostInfos(any());
+ assertTrue(modified);
+
+ // cache miss
+ HostInfos hostInfos3 = cache.getHostInfos(application);
+ verify(service, times(2)).getHostInfos(any());
+ assertSame(hostInfos1, hostInfos);
+ }
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
index f0f9ebfb13c..12622f22837 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusServiceTest.java
@@ -96,7 +96,7 @@ public class ZookeeperStatusServiceTest {
@Test
public void host_state_for_unknown_hosts_is_no_remarks() {
assertThat(
- zookeeperStatusService.getHostStatus(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1),
+ zookeeperStatusService.getHostInfo(TestIds.APPLICATION_INSTANCE_REFERENCE, TestIds.HOST_NAME1).status(),
is(HostStatus.NO_REMARKS));
}
@@ -111,13 +111,13 @@ public class ZookeeperStatusServiceTest {
.lockApplicationInstance_forCurrentThreadOnly(context, TestIds.APPLICATION_INSTANCE_REFERENCE)) {
//shuffling to catch "clean database" failures for all cases.
- for (HostStatus hostStatus: shuffledList(HostStatus.values())) {
+ for (HostStatus hostStatus: shuffledList(HostStatus.NO_REMARKS, HostStatus.ALLOWED_TO_BE_DOWN)) {
for (int i = 0; i < 2; i++) {
statusRegistry.setHostState(
TestIds.HOST_NAME1,
hostStatus);
- assertThat(statusRegistry.getHostStatus(TestIds.HOST_NAME1),
+ assertThat(statusRegistry.getHostInfo(TestIds.HOST_NAME1).status(),
is(hostStatus));
}
}
@@ -182,7 +182,7 @@ public class ZookeeperStatusServiceTest {
killSession(curator.framework(), testingServer);
//Throws SessionFailedException if the SessionFailRetryLoop has not been closed.
- statusRegistry.getHostStatus(TestIds.HOST_NAME1);
+ statusRegistry.getHostInfo(TestIds.HOST_NAME1);
});
assertThat(resultOfZkOperationAfterLockFailure, notHoldsException());
@@ -289,7 +289,8 @@ public class ZookeeperStatusServiceTest {
}
//TODO: move to vespajlib
- private static <T> List<T> shuffledList(T[] values) {
+ @SafeVarargs
+ private static <T> List<T> shuffledList(T... values) {
//new ArrayList necessary to avoid "write through" behaviour
List<T> list = new ArrayList<>(Arrays.asList(values));
Collections.shuffle(list);