summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2019-02-07 16:34:16 +0100
committerJon Marius Venstad <venstad@gmail.com>2019-02-07 16:34:16 +0100
commit5eec3429dcad027cb87f875b6fb4db2f29904df9 (patch)
tree906aa17376972b48baed0be9a6c0bf3f99f2c8a4
parent7d6cc76f9493f8d799ce7e06a6c4d2504a52e7d9 (diff)
Read host statuses in bulk per app, and cache until any changes
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java46
1 files changed, 40 insertions, 6 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
index 0d00ab7becd..a167e185012 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/status/ZookeeperStatusService.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.curator.recipes.CuratorCounter;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import org.apache.zookeeper.KeeperException.NoNodeException;
@@ -15,9 +16,13 @@ import org.apache.zookeeper.data.Stat;
import javax.inject.Inject;
import java.time.Duration;
+import java.util.Collections;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
/**
* Stores instance suspension status and which hosts are allowed to go down in zookeeper.
@@ -31,12 +36,22 @@ public class ZookeeperStatusService implements StatusService {
final static String HOST_STATUS_BASE_PATH = "/vespa/host-status-service";
final static String APPLICATION_STATUS_BASE_PATH = "/vespa/application-status-service";
+ final static String COUNTER_PATH = "/vespa/cache-counter";
private final Curator curator;
+ private final CuratorCounter counter;
+
+ /** A cache of hosts allowed to be down. Access only through {@link #getValidCache()}! */
+ private final Map<ApplicationInstanceReference, Set<HostName>> hostsDown;
+
+ private volatile long cacheRefreshedAt;
@Inject
public ZookeeperStatusService(@Component Curator curator) {
this.curator = curator;
+ this.counter = new CuratorCounter(curator, COUNTER_PATH);
+ this.cacheRefreshedAt = counter.get();
+ this.hostsDown = new ConcurrentHashMap<>();
}
@Override
@@ -122,6 +137,9 @@ public class ZookeeperStatusService implements StatusService {
//TODO: IOException with explanation
throw new RuntimeException(e);
}
+ finally {
+ counter.next();
+ }
}
private void deleteNode_ignoreNoNodeException(String path, String debugLogMessageIfNotExists) throws Exception {
@@ -142,15 +160,31 @@ public class ZookeeperStatusService implements StatusService {
}
}
- //TODO: Eliminate repeated calls to getHostStatus, replace with bulk operation.
private HostStatus getInternalHostStatus(ApplicationInstanceReference applicationInstanceReference, HostName hostName) {
+ return getValidCache().computeIfAbsent(applicationInstanceReference, this::hostsDownFor)
+ .contains(hostName) ? HostStatus.ALLOWED_TO_BE_DOWN : HostStatus.NO_REMARKS;
+ }
+
+ /** Holding an application's lock ensures the cache is up to date for that application. */
+ private Map<ApplicationInstanceReference, Set<HostName>> getValidCache() {
+ long cacheGeneration = counter.get();
+ if (counter.get() != cacheRefreshedAt) {
+ cacheRefreshedAt = cacheGeneration;
+ hostsDown.clear();
+ }
+ return hostsDown;
+ }
+
+ private Set<HostName> hostsDownFor(ApplicationInstanceReference application) {
try {
- Stat statOrNull = curator.framework().checkExists().forPath(
- hostAllowedDownPath(applicationInstanceReference, hostName));
+ if (curator.framework().checkExists().forPath(hostsAllowedDownPath(application)) == null)
+ return Collections.emptySet();
- return (statOrNull == null) ? HostStatus.NO_REMARKS : HostStatus.ALLOWED_TO_BE_DOWN;
- } catch (Exception e) {
- //TODO: IOException with explanation - Should we only catch IOExceptions or are they a special case?
+ return curator.framework().getChildren().forPath(hostsAllowedDownPath(application))
+ .stream().map(HostName::new)
+ .collect(Collectors.toUnmodifiableSet());
+ }
+ catch (Exception e) {
throw new RuntimeException(e);
}
}