diff options
147 files changed, 3590 insertions, 762 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8456b79f124..eba69f6ed02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ add_subdirectory(vdslib) add_subdirectory(vdstestlib) add_subdirectory(vespa-athenz) add_subdirectory(vespa-http-client) +add_subdirectory(vespa-testrunner-components) add_subdirectory(vespa_feed_perf) add_subdirectory(vespa_jersey2) add_subdirectory(vespabase) diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java index b15cb2ad399..c95d814eb99 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java @@ -75,6 +75,7 @@ public class ClusterControllerClusterConfigurer { options.setMaxDeferredTaskVersionWaitTime(Duration.ofMillis((int)(config.max_deferred_task_version_wait_time_sec() * 1000))); options.clusterHasGlobalDocumentTypes = config.cluster_has_global_document_types(); options.minMergeCompletionRatio = config.min_merge_completion_ratio(); + options.enableTwoPhaseClusterStateActivation = config.enable_two_phase_cluster_state_transitions(); } private void configure(SlobroksConfig config) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ActivateClusterStateVersionRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ActivateClusterStateVersionRequest.java new file mode 100644 index 00000000000..26d63f7ba60 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ActivateClusterStateVersionRequest.java @@ -0,0 +1,13 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +/** + * Wrapper for a cluster state activation request towards a single node. + */ +public class ActivateClusterStateVersionRequest extends ClusterStateVersionSpecificRequest { + + public ActivateClusterStateVersionRequest(NodeInfo nodeInfo, int systemStateVersion) { + super(nodeInfo, systemStateVersion); + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java index 76177e8f1c1..fc06fef5b30 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java @@ -19,16 +19,23 @@ import java.util.stream.Collectors; * * The baseline state is identical to the legacy, global cluster state that the * cluster controller has historically produced as its only output. + * + * The bundle also contains an additional "deferred activation" flag which tells + * the recipient if the cluster state transition should complete immediately or + * await an explicit activation RPC from the cluster controller. */ public class ClusterStateBundle { private final AnnotatedClusterState baselineState; private final Map<String, AnnotatedClusterState> derivedBucketSpaceStates; + private final boolean deferredActivation; public static class Builder { private final AnnotatedClusterState baselineState; + private Map<String, AnnotatedClusterState> explicitDerivedStates; private ClusterStateDeriver stateDeriver; private Set<String> bucketSpaces; + private boolean deferredActivation = false; public Builder(AnnotatedClusterState baselineState) { this.baselineState = baselineState; @@ -40,30 +47,59 @@ public class ClusterStateBundle { } public Builder bucketSpaces(Set<String> bucketSpaces) { + if (this.explicitDerivedStates != null) { + throw new IllegalStateException("Cannot set bucket spaces on Builder that already " + + "has explicit derived states set"); + } this.bucketSpaces = bucketSpaces; return this; } public Builder bucketSpaces(String... bucketSpaces) { - this.bucketSpaces = new TreeSet<>(Arrays.asList(bucketSpaces)); + return bucketSpaces(new TreeSet<>(Arrays.asList(bucketSpaces))); + } + + public Builder explicitDerivedStates(Map<String, AnnotatedClusterState> derivedStates) { + if (this.bucketSpaces != null || this.stateDeriver != null) { + throw new IllegalStateException("Cannot set explicitly derived states on Builder " + + "that already has bucket spaces or deriver set"); + } + this.explicitDerivedStates = derivedStates; + return this; + } + + public Builder deferredActivation(boolean deferred) { + this.deferredActivation = deferred; return this; } public ClusterStateBundle deriveAndBuild() { - if (stateDeriver == null || bucketSpaces == null || bucketSpaces.isEmpty()) { - return ClusterStateBundle.ofBaselineOnly(baselineState); + if ((stateDeriver == null || bucketSpaces == null || bucketSpaces.isEmpty()) && explicitDerivedStates == null) { + return ClusterStateBundle.ofBaselineOnly(baselineState, deferredActivation); + } + Map<String, AnnotatedClusterState> derived; + if (explicitDerivedStates != null) { + derived = explicitDerivedStates; + } else { + derived = bucketSpaces.stream() + .collect(Collectors.toMap( + Function.identity(), + s -> stateDeriver.derivedFrom(baselineState, s))); } - Map<String, AnnotatedClusterState> derived = bucketSpaces.stream() - .collect(Collectors.toMap( - Function.identity(), - s -> stateDeriver.derivedFrom(baselineState, s))); - return new ClusterStateBundle(baselineState, derived); + return new ClusterStateBundle(baselineState, derived, deferredActivation); } } private ClusterStateBundle(AnnotatedClusterState baselineState, Map<String, AnnotatedClusterState> derivedBucketSpaceStates) { + this(baselineState, derivedBucketSpaceStates, false); + } + + private ClusterStateBundle(AnnotatedClusterState baselineState, Map<String, + AnnotatedClusterState> derivedBucketSpaceStates, + boolean deferredActivation) { this.baselineState = baselineState; this.derivedBucketSpaceStates = Collections.unmodifiableMap(derivedBucketSpaceStates); + this.deferredActivation = deferredActivation; } public static Builder builder(AnnotatedClusterState baselineState) { @@ -74,6 +110,16 @@ public class ClusterStateBundle { return new ClusterStateBundle(baselineState, derivedBucketSpaceStates); } + public static ClusterStateBundle of(AnnotatedClusterState baselineState, + Map<String, AnnotatedClusterState> derivedBucketSpaceStates, + boolean deferredActivation) { + return new ClusterStateBundle(baselineState, derivedBucketSpaceStates, deferredActivation); + } + + public static ClusterStateBundle ofBaselineOnly(AnnotatedClusterState baselineState, boolean deferredActivation) { + return new ClusterStateBundle(baselineState, Collections.emptyMap(), deferredActivation); + } + public static ClusterStateBundle ofBaselineOnly(AnnotatedClusterState baselineState) { return new ClusterStateBundle(baselineState, Collections.emptyMap()); } @@ -94,13 +140,15 @@ public class ClusterStateBundle { return derivedBucketSpaceStates; } + public boolean deferredActivation() { return this.deferredActivation; } + public ClusterStateBundle cloneWithMapper(Function<ClusterState, ClusterState> mapper) { AnnotatedClusterState clonedBaseline = baselineState.cloneWithClusterState( mapper.apply(baselineState.getClusterState().clone())); Map<String, AnnotatedClusterState> clonedDerived = derivedBucketSpaceStates.entrySet().stream() .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue().cloneWithClusterState( mapper.apply(e.getValue().getClusterState().clone())))); - return new ClusterStateBundle(clonedBaseline, clonedDerived); + return new ClusterStateBundle(clonedBaseline, clonedDerived, deferredActivation); } public ClusterStateBundle clonedWithVersionSet(int version) { @@ -127,12 +175,14 @@ public class ClusterStateBundle { @Override public String toString() { if (derivedBucketSpaceStates.isEmpty()) { - return String.format("ClusterStateBundle('%s')", baselineState); + return String.format("ClusterStateBundle('%s'%s)", baselineState, + deferredActivation ? " (deferred activation)" : ""); } Map<String, AnnotatedClusterState> orderedStates = new TreeMap<>(derivedBucketSpaceStates); - return String.format("ClusterStateBundle('%s', %s)", baselineState, orderedStates.entrySet().stream() + return String.format("ClusterStateBundle('%s', %s%s)", baselineState, orderedStates.entrySet().stream() .map(e -> String.format("%s '%s'", e.getKey(), e.getValue())) - .collect(Collectors.joining(", "))); + .collect(Collectors.joining(", ")), + deferredActivation ? " (deferred activation)" : ""); } @Override @@ -140,13 +190,13 @@ public class ClusterStateBundle { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ClusterStateBundle that = (ClusterStateBundle) o; - return Objects.equals(baselineState, that.baselineState) && + return deferredActivation == that.deferredActivation && + Objects.equals(baselineState, that.baselineState) && Objects.equals(derivedBucketSpaceStates, that.derivedBucketSpaceStates); } @Override public int hashCode() { - return Objects.hash(baselineState, derivedBucketSpaceStates); + return Objects.hash(baselineState, derivedBucketSpaceStates, deferredActivation); } - } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java new file mode 100644 index 00000000000..9e8abc0608e --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateVersionSpecificRequest.java @@ -0,0 +1,67 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +/** + * Base class for distributor/content node node RPC requests that are bound + * to a particular cluster state version. + */ +public abstract class ClusterStateVersionSpecificRequest { + + private final NodeInfo nodeInfo; + private final int clusterStateVersion; + private Reply reply; + + public ClusterStateVersionSpecificRequest(NodeInfo nodeInfo, int clusterStateVersion) { + this.nodeInfo = nodeInfo; + this.clusterStateVersion = clusterStateVersion; + } + + public NodeInfo getNodeInfo() { return nodeInfo; } + + public int getClusterStateVersion() { return clusterStateVersion; } + + public void setReply(Reply reply) { this.reply = reply; } + + public Reply getReply() { return reply; } + + public static class Reply { + + final int returnCode; + final String returnMessage; + final int actualVersion; + + public Reply() { + this(0, null); + } + + public Reply(int returnCode, String returnMessage) { + this.returnCode = returnCode; + this.returnMessage = returnMessage; + this.actualVersion = -1; + } + + private Reply(int actualVersion) { + this.returnCode = 0; + this.returnMessage = null; + this.actualVersion = actualVersion; + } + + public static Reply withActualVersion(int version) { + return new Reply(version); + } + + /** Returns whether this is an error response */ + public boolean isError() { return returnCode != 0; } + + /** Returns the return code, which is 0 if this request was successful */ + public int getReturnCode() { return returnCode; } + + /** Returns the message returned, or null if none */ + public String getReturnMessage() { return returnMessage; } + + /** Returns actual cluster state version active on node, or -1 if reply does not contain this information */ + public int getActualVersion() { return actualVersion; } + + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java index 450513343b0..900eee54cd3 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/Communicator.java @@ -20,6 +20,8 @@ public interface Communicator { void setSystemState(ClusterStateBundle states, NodeInfo node, Waiter<SetClusterStateRequest> waiter); + void activateClusterStateVersion(int clusterStateVersion, NodeInfo node, Waiter<ActivateClusterStateVersionRequest> waiter); + void shutdown(); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index 6ff297b4a31..43412311436 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -45,7 +45,7 @@ public class ContentCluster { final VdsClusterHtmlRendrer vdsClusterHtmlRendrer, final StringBuilder sb, final Timer timer, - final ClusterState state, + final ClusterStateBundle state, final ClusterStatsAggregator statsAggregator, final Distribution distribution, final FleetControllerOptions options, diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 005bf7971a5..ba35243c14d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -76,6 +76,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd private boolean waitingForCycle = false; private StatusPageServer.PatternRequestRouter statusRequestRouter = new StatusPageServer.PatternRequestRouter(); private final List<ClusterStateBundle> newStates = new ArrayList<>(); + private final List<ClusterStateBundle> convergedStates = new ArrayList<>(); private long configGeneration = -1; private long nextConfigGeneration = -1; private Queue<RemoteClusterControllerTask> remoteTasks = new LinkedList<>(); @@ -253,6 +254,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd throw new NullPointerException("Cluster state should never be null at this point"); } listener.handleNewPublishedState(ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.withoutAnnotations(state))); + ClusterStateBundle convergedState = systemStateBroadcaster.getLastClusterStateBundleConverged(); + if (convergedState != null) { + listener.handleStateConvergedInCluster(convergedState); + } } public FleetControllerOptions getOptions() { @@ -435,9 +440,11 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd /** Called when all distributors have acked newest cluster state version. */ public void handleAllDistributorsInSync(DatabaseHandler database, DatabaseHandler.Context context) throws InterruptedException { Set<ConfiguredNode> nodes = new HashSet<>(cluster.clusterInfo().getConfiguredNodes().values()); - ClusterState currentState = stateVersionTracker.getVersionedClusterState(); - log.fine(() -> String.format("All distributors have ACKed cluster state version %d", currentState.getVersion())); - stateChangeHandler.handleAllDistributorsInSync(currentState, nodes, database, context); + // TODO wouldn't it be better to always get bundle information from the state broadcaster? + var currentBundle = stateVersionTracker.getVersionedClusterStateBundle(); + log.fine(() -> String.format("All distributors have ACKed cluster state version %d", currentBundle.getVersion())); + stateChangeHandler.handleAllDistributorsInSync(currentBundle.getBaselineClusterState(), nodes, database, context); + convergedStates.add(currentBundle); } private boolean changesConfiguredNodeSet(Collection<ConfiguredNode> newNodes) { @@ -666,12 +673,14 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd // Reset timer to only see warning once. firstAllowedStateBroadcast = currentTime; } - sentAny = systemStateBroadcaster.broadcastNewState(databaseContext, communicator); + sentAny = systemStateBroadcaster.broadcastNewStateBundleIfRequired(databaseContext, communicator); if (sentAny) { // FIXME won't this inhibit resending to unresponsive nodes? nextStateSendTime = currentTime + options.minTimeBetweenNewSystemStates; } } + // Always allow activations if we've already broadcasted a state + sentAny |= systemStateBroadcaster.broadcastStateActivationsIfRequired(databaseContext, communicator); return sentAny; } @@ -679,13 +688,23 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if ( ! newStates.isEmpty()) { synchronized (systemStateListeners) { for (ClusterStateBundle stateBundle : newStates) { - for(SystemStateListener listener : systemStateListeners) { + for (SystemStateListener listener : systemStateListeners) { listener.handleNewPublishedState(stateBundle); } } newStates.clear(); } } + if ( ! convergedStates.isEmpty()) { + synchronized (systemStateListeners) { + for (ClusterStateBundle stateBundle : convergedStates) { + for (SystemStateListener listener : systemStateListeners) { + listener.handleStateConvergedInCluster(stateBundle); + } + } + convergedStates.clear(); + } + } } private boolean processNextQueuedRemoteTask() { @@ -822,6 +841,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd final ClusterStateBundle candidateBundle = ClusterStateBundle.builder(candidate) .bucketSpaces(configuredBucketSpaces) .stateDeriver(createBucketSpaceStateDeriver()) + .deferredActivation(options.enableTwoPhaseClusterStateActivation) .deriveAndBuild(); stateVersionTracker.updateLatestCandidateStateBundle(candidateBundle); invokeCandidateStateListeners(candidateBundle); @@ -1046,7 +1066,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd while (true) { int ackedNodes = 0; for (NodeInfo node : cluster.getNodeInfo()) { - if (node.getSystemStateVersionAcknowledged() >= version) { + if (node.getClusterStateVersionBundleAcknowledged() >= version) { ++ackedNodes; } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index e069dde1901..f49b626d347 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -121,6 +121,8 @@ public class FleetControllerOptions implements Cloneable { public boolean clusterHasGlobalDocumentTypes = false; + public boolean enableTwoPhaseClusterStateActivation = false; + // TODO: Choose a default value public double minMergeCompletionRatio = 1.0; @@ -231,6 +233,7 @@ public class FleetControllerOptions implements Cloneable { sb.append("<tr><td><nobr>Wanted distribution bits</nobr></td><td align=\"right\">").append(distributionBits).append("</td></tr>"); sb.append("<tr><td><nobr>Max deferred task version wait time</nobr></td><td align=\"right\">").append(maxDeferredTaskVersionWaitTime.toMillis()).append("ms</td></tr>"); sb.append("<tr><td><nobr>Cluster has global document types configured</nobr></td><td align=\"right\">").append(clusterHasGlobalDocumentTypes).append("</td></tr>"); + sb.append("<tr><td><nobr>Enable 2-phase cluster state activation protocol</nobr></td><td align=\"right\">").append(enableTwoPhaseClusterStateActivation).append("</td></tr>"); sb.append("</table>"); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java index 54cf2dad00a..82d13e2d9ef 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeInfo.java @@ -14,7 +14,6 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.TreeMap; import java.util.logging.Logger; @@ -74,11 +73,16 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { * Version 1 is for the getnodestate2 command ((legacy, not supported). * Version 2 is for the getnodestate3 command * Version 3 adds support for setdistributionstates + * Version 4 adds support for explicit cluster state version bundle activation */ private int version; - private Map<Integer, ClusterState> systemStateVersionSent = new TreeMap<>(); - private ClusterState systemStateVersionAcknowledged; + // Mapping of cluster state version -> cluster state bundle instance + private TreeMap<Integer, ClusterStateBundle> clusterStateVersionBundleSent = new TreeMap<>(); + private ClusterStateBundle clusterStateVersionBundleAcknowledged; + + private int clusterStateVersionActivationSent = -1; + private int clusterStateVersionActivationAcked = -1; /** * When a node goes from an up state to a down state, update this flag with the start timestamp the node had before going down. * The cluster state broadcaster will use this to identify whether distributors have restarted. @@ -102,7 +106,9 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { // NOTE: See update(node) below NodeInfo(ContentCluster cluster, Node n, boolean configuredRetired, String rpcAddress, Distribution distribution) { - if (cluster == null) throw new IllegalArgumentException("Cluster not set"); + if (cluster == null) { + throw new IllegalArgumentException("Cluster not set"); + } reportedState = new NodeState(n.getType(), State.DOWN); wantedState = new NodeState(n.getType(), State.UP); this.cluster = cluster; @@ -238,7 +244,7 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { public ContentCluster getCluster() { return cluster; } - /** Returns true if the node is currentl registered in slobrok */ + /** Returns true if the node is currently registered in slobrok */ // FIXME why is this called "isRpcAddressOutdated" then??? public boolean isRpcAddressOutdated() { return lastSeenInSlobrok != null; } @@ -353,12 +359,13 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { /** Sets the wanted state. The wanted state is taken as UP if a null argument is given */ public void setWantedState(NodeState state) { - if (state == null) + if (state == null) { state = new NodeState(node.getType(), State.UP); + } NodeState newWanted = new NodeState(node.getType(), state.getState()); newWanted.setDescription(state.getDescription()); if (!newWanted.equals(state)) { - try{ + try { throw new Exception(); } catch (Exception e) { StringWriter sw = new StringWriter(); @@ -408,50 +415,70 @@ abstract public class NodeInfo implements Comparable<NodeInfo> { } public int getVersion() { return version; } - public int getConnectionVersion() { return connectionVersion; } - public void setConnectionVersion(int version) { connectionVersion = version; } public ClusterState getNewestSystemStateSent() { - ClusterState last = null; - for (ClusterState s : systemStateVersionSent.values()) { - if (last == null || last.getVersion() < s.getVersion()) { - last = s; - } + if (clusterStateVersionBundleSent.isEmpty()) { + return null; } - return last; + return clusterStateVersionBundleSent.lastEntry().getValue().getBaselineClusterState(); } public int getNewestSystemStateVersionSent() { ClusterState last = getNewestSystemStateSent(); return last == null ? -1 : last.getVersion(); } - public int getSystemStateVersionAcknowledged() { - return (systemStateVersionAcknowledged == null ? -1 : systemStateVersionAcknowledged.getVersion()); + + public int getClusterStateVersionBundleAcknowledged() { + return (clusterStateVersionBundleAcknowledged == null ? -1 : clusterStateVersionBundleAcknowledged.getVersion()); } - public void setSystemStateVersionSent(ClusterState state) { - if (state == null) throw new Error("Should not clear info for last version sent"); - if (systemStateVersionSent.containsKey(state.getVersion())) { - throw new IllegalStateException("We have already sent cluster state version " + state.getVersion() + " to " + node); + public void setClusterStateVersionBundleSent(ClusterStateBundle stateBundle) { + if (stateBundle == null) { + throw new Error("Should not clear info for last version sent"); } - systemStateVersionSent.put(state.getVersion(), state); + if (clusterStateVersionBundleSent.containsKey(stateBundle.getVersion())) { + throw new IllegalStateException("We have already sent cluster state version " + stateBundle.getVersion() + " to " + node); + } + clusterStateVersionBundleSent.put(stateBundle.getVersion(), stateBundle); } - public void setSystemStateVersionAcknowledged(Integer version, boolean success) { - if (version == null) throw new Error("Should not clear info for last version acked"); - if (!systemStateVersionSent.containsKey(version)) { + public void setClusterStateBundleVersionAcknowledged(Integer version, boolean success) { + if (version == null) { + throw new Error("Should not clear info for last version acked"); + } + if (!clusterStateVersionBundleSent.containsKey(version)) { throw new IllegalStateException("Got response for cluster state " + version + " which is not tracked as pending for node " + node); } - ClusterState state = systemStateVersionSent.remove(version); - if (success && (systemStateVersionAcknowledged == null || systemStateVersionAcknowledged.getVersion() < state.getVersion())) { - systemStateVersionAcknowledged = state; + var stateBundle = clusterStateVersionBundleSent.remove(version); + if (success && (clusterStateVersionBundleAcknowledged == null || clusterStateVersionBundleAcknowledged.getVersion() < stateBundle.getVersion())) { + clusterStateVersionBundleAcknowledged = stateBundle; if (wentDownWithStartTime != 0 - && (wentDownAtClusterState == null || wentDownAtClusterState.getVersion() < state.getVersion()) - && !state.getNodeState(node).getState().oneOf("dsm")) + && (wentDownAtClusterState == null || wentDownAtClusterState.getVersion() < stateBundle.getVersion()) + && !stateBundle.getBaselineClusterState().getNodeState(node).getState().oneOf("dsm")) { - log.log(LogLevel.DEBUG, "Clearing going down timestamp of node " + node + " after receiving ack of cluster state " + state); + log.log(LogLevel.DEBUG, () -> String.format("Clearing going down timestamp of node %s after " + + "receiving ack of cluster state bundle %s", node, stateBundle)); wentDownWithStartTime = 0; } } } + public void setClusterStateVersionActivationSent(int version) { + clusterStateVersionActivationSent = version; + } + public int getClusterStateVersionActivationSent() { + return clusterStateVersionActivationSent; + } + + public int getClusterStateVersionActivationAcked() { + return clusterStateVersionActivationAcked; + } + public void setSystemStateVersionActivationAcked(Integer version, boolean success) { + if (success && (version > clusterStateVersionActivationAcked)) { + clusterStateVersionActivationAcked = version; + } else if (!success) { + clusterStateVersionActivationSent = -1; // Trigger resend + } + } + + public void setHostInfo(HostInfo hostInfo) { // Note: This will blank out any hostInfo we already had, if the parsing fails. // This is intentional, to make sure we're never left with stale data. diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java index 836876b5642..d4e79a4f2b2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SetClusterStateRequest.java @@ -1,48 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -public abstract class SetClusterStateRequest { - - private final NodeInfo nodeInfo; - private final int systemStateVersion; - private Reply reply; - - public SetClusterStateRequest(NodeInfo nodeInfo, int systemStateVersion) { - this.nodeInfo = nodeInfo; - this.systemStateVersion = systemStateVersion; - } - - public NodeInfo getNodeInfo() { return nodeInfo; } - - public int getSystemStateVersion() { return systemStateVersion; } - - public void setReply(Reply reply) { this.reply = reply; } - - public Reply getReply() { return reply; } - - public static class Reply { - - final int returnCode; - final String returnMessage; - - public Reply() { - this(0, null); - } - - public Reply(int returnCode, String returnMessage) { - this.returnCode = returnCode; - this.returnMessage = returnMessage; - } - - /** Returns whether this is an error response */ - public boolean isError() { return returnCode != 0; } - - /** Returns the return code, which is 0 if this request was successful */ - public int getReturnCode() { return returnCode; } - - /** Returns the message returned, or null if none */ - public String getReturnMessage() { return returnMessage; } +public abstract class SetClusterStateRequest extends ClusterStateVersionSpecificRequest { + public SetClusterStateRequest(NodeInfo nodeInfo, int clusterStateVersion) { + super(nodeInfo, clusterStateVersion); } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java index 629800fb13c..5ecb57a1c76 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.jrt.ErrorCode; import com.yahoo.log.LogLevel; import com.yahoo.vdslib.state.*; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; @@ -19,13 +20,18 @@ public class SystemStateBroadcaster { private final Timer timer; private final Object monitor; private ClusterStateBundle clusterStateBundle; - private final List<SetClusterStateRequest> replies = new LinkedList<>(); + private final List<SetClusterStateRequest> setClusterStateReplies = new LinkedList<>(); + private final List<ActivateClusterStateVersionRequest> activateClusterStateVersionReplies = new LinkedList<>(); private final static long minTimeBetweenNodeErrorLogging = 10 * 60 * 1000; private final Map<Node, Long> lastErrorReported = new TreeMap<>(); - private int lastClusterStateInSync = 0; - private final ClusterStateWaiter waiter = new ClusterStateWaiter(); + private int lastStateVersionBundleAcked = 0; + private int lastClusterStateVersionConverged = 0; + private ClusterStateBundle lastClusterStateBundleConverged; + + private final SetClusterStateWaiter setClusterStateWaiter = new SetClusterStateWaiter(); + private final ActivateClusterStateVersionWaiter activateClusterStateVersionWaiter = new ActivateClusterStateVersionWaiter(); public SystemStateBroadcaster(Timer timer, Object monitor) { this.timer = timer; @@ -52,95 +58,204 @@ public class SystemStateBroadcaster { return clusterStateBundle; } + public ClusterStateBundle getLastClusterStateBundleConverged() { + return lastClusterStateBundleConverged; + } + private void reportNodeError(boolean nodeOk, NodeInfo info, String message) { long time = timer.getCurrentTimeInMillis(); Long lastReported = lastErrorReported.get(info.getNode()); boolean alreadySeen = (lastReported != null && time - lastReported < minTimeBetweenNodeErrorLogging); - log.log(nodeOk && !alreadySeen ? LogLevel.WARNING : LogLevel.DEBUG, message); - if (!alreadySeen) lastErrorReported.put(info.getNode(), time); + log.log((nodeOk && !alreadySeen) ? LogLevel.WARNING : LogLevel.DEBUG, message); + if (!alreadySeen) { + lastErrorReported.put(info.getNode(), time); + } } public boolean processResponses() { boolean anyResponsesFound = false; synchronized(monitor) { - for(SetClusterStateRequest req : replies) { - anyResponsesFound = true; - - NodeInfo info = req.getNodeInfo(); - boolean nodeOk = info.getReportedState().getState().oneOf("uir"); - int version = req.getSystemStateVersion(); - - if (req.getReply().isError()) { - info.setSystemStateVersionAcknowledged(version, false); - if (req.getReply().getReturnCode() != Communicator.TRANSIENT_ERROR) { - if (info.getNewestSystemStateVersionSent() == version) { - reportNodeError(nodeOk, info, - "Got error response " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage() - + " from " + info + " setsystemstate request."); - } - } + anyResponsesFound = !setClusterStateReplies.isEmpty() || !activateClusterStateVersionReplies.isEmpty(); + processSetClusterStateResponses(); + processActivateClusterStateVersionResponses(); + } + return anyResponsesFound; + } + + private void processActivateClusterStateVersionResponses() { + for (var req : activateClusterStateVersionReplies) { + NodeInfo info = req.getNodeInfo(); + int version = req.getClusterStateVersion(); + boolean success = true; + var reply = req.getReply(); + if (reply.isError()) { + // NO_SUCH_METHOD implies node is on a version that does not understand explicit activations + // and it has already merrily started using the state version. Treat as if it had been ACKed. + if (reply.getReturnCode() != ErrorCode.NO_SUCH_METHOD) { + log.log(LogLevel.DEBUG, () -> String.format("Activation NACK for node %s with version %d, message %s", + info, version, reply.getReturnMessage())); + success = false; } else { - info.setSystemStateVersionAcknowledged(version, true); - log.log(LogLevel.DEBUG, "Node " + info + " acked system state version " + version + "."); - lastErrorReported.remove(info.getNode()); + log.log(LogLevel.DEBUG, () -> String.format("Node %s did not understand state activation RPC; " + + "implicitly treating state %d as activated on node", info, version)); } + } else if (reply.getActualVersion() != version) { + boolean nodeOk = nodeReportsSelfAsAvailable(info); + // Avoid spamming the logs since this will happen on all resends until (presumably) the controller + // loses election status. + // TODO this should trigger a loss of current controller's leadership! + reportNodeError(nodeOk, info, String.format("Activation of version %d did not take effect, node %s " + + "reports it has an actual pending version of %d. Racing with another controller?", + version, info, reply.getActualVersion())); + success = false; + } else { + log.log(LogLevel.DEBUG, () -> String.format("Node %s reports successful activation of state " + + "version %d", info, version)); } - replies.clear(); + info.setSystemStateVersionActivationAcked(version, success); + // TODO we currently don't invoke reportNodeError here.. We assume that node errors will be reported + // as part of processSetClusterStateResponses anyway, but can add it here as well if deemed necessary. } - return anyResponsesFound; + activateClusterStateVersionReplies.clear(); } - private boolean nodeNeedsClusterState(NodeInfo node) { - if (node.getSystemStateVersionAcknowledged() == clusterStateBundle.getVersion()) { - return false; // No point in sending if node already has updated system state + private static boolean nodeReportsSelfAsAvailable(NodeInfo info) { + return info.getReportedState().getState().oneOf("uir"); + } + + private void processSetClusterStateResponses() { + for (SetClusterStateRequest req : setClusterStateReplies) { + NodeInfo info = req.getNodeInfo(); + int version = req.getClusterStateVersion(); + + if (req.getReply().isError()) { + info.setClusterStateBundleVersionAcknowledged(version, false); + if (req.getReply().getReturnCode() != Communicator.TRANSIENT_ERROR) { + if (info.getNewestSystemStateVersionSent() == version) { + boolean nodeOk = nodeReportsSelfAsAvailable(info); + reportNodeError(nodeOk, info, + String.format("Got error response %d: %s from %s setdistributionstates request.", + req.getReply().getReturnCode(), req.getReply().getReturnMessage(), info)); + } + } + } else { + info.setClusterStateBundleVersionAcknowledged(version, true); + log.log(LogLevel.DEBUG, () -> String.format("Node %s ACKed system state version %d.", info, version)); + lastErrorReported.remove(info.getNode()); + } } + setClusterStateReplies.clear(); + } + + private static boolean nodeIsReachable(NodeInfo node) { if (node.getRpcAddress() == null || node.isRpcAddressOutdated()) { return false; // Can't set state on nodes we don't know where are } if (node.getReportedState().getState() == State.MAINTENANCE || - node.getReportedState().getState() == State.DOWN || - node.getReportedState().getState() == State.STOPPING) + node.getReportedState().getState() == State.DOWN || + node.getReportedState().getState() == State.STOPPING) { return false; // No point in sending system state to nodes that can't receive messages or don't want them } return true; } + private boolean nodeNeedsClusterStateBundle(NodeInfo node) { + if (node.getClusterStateVersionBundleAcknowledged() == clusterStateBundle.getVersion()) { + return false; // No point in sending if node already has updated system state + } + return nodeIsReachable(node); + } + + private boolean nodeNeedsClusterStateActivation(NodeInfo node) { + if (node.getClusterStateVersionActivationAcked() == clusterStateBundle.getVersion()) { + return false; // No point in sending if node already has activated cluster state version + } + return nodeIsReachable(node); + } + private List<NodeInfo> resolveStateVersionSendSet(DatabaseHandler.Context dbContext) { return dbContext.getCluster().getNodeInfo().stream() - .filter(this::nodeNeedsClusterState) - .filter(node -> !newestStateAlreadySentToNode(node)) + .filter(this::nodeNeedsClusterStateBundle) + .filter(node -> !newestStateBundleAlreadySentToNode(node)) + .collect(Collectors.toList()); + } + + // Precondition: no nodes in the cluster need to receive the current cluster state version bundle + private List<NodeInfo> resolveStateActivationSendSet(DatabaseHandler.Context dbContext) { + return dbContext.getCluster().getNodeInfo().stream() + .filter(this::nodeNeedsClusterStateActivation) + .filter(node -> !newestStateActivationAlreadySentToNode(node)) .collect(Collectors.toList()); } - private boolean newestStateAlreadySentToNode(NodeInfo node) { + private boolean newestStateBundleAlreadySentToNode(NodeInfo node) { return (node.getNewestSystemStateVersionSent() == clusterStateBundle.getVersion()); } + private boolean newestStateActivationAlreadySentToNode(NodeInfo node) { + return (node.getClusterStateVersionActivationSent() == clusterStateBundle.getVersion()); + } + /** - * Checks if all distributor nodes have ACKed the most recent cluster state. Iff this - * is the case, triggers handleAllDistributorsInSync() on the provided FleetController + * Checks if all distributor nodes have ACKed (and activated) the most recent cluster state. + * Iff this is the case, triggers handleAllDistributorsInSync() on the provided FleetController * object and updates the broadcaster's last known in-sync cluster state version. */ void checkIfClusterStateIsAckedByAllDistributors(DatabaseHandler database, - DatabaseHandler.Context dbContext, - FleetController fleetController) throws InterruptedException { - if ((clusterStateBundle == null) || (lastClusterStateInSync == clusterStateBundle.getVersion())) { + DatabaseHandler.Context dbContext, + FleetController fleetController) throws InterruptedException { + if ((clusterStateBundle == null) || currentClusterStateIsConverged()) { return; // Nothing to do for the current state } final int currentStateVersion = clusterStateBundle.getVersion(); - boolean anyOutdatedDistributorNodes = dbContext.getCluster().getNodeInfo().stream() + boolean anyDistributorsNeedStateBundle = dbContext.getCluster().getNodeInfo().stream() .filter(NodeInfo::isDistributor) - .anyMatch(this::nodeNeedsClusterState); + .anyMatch(this::nodeNeedsClusterStateBundle); - if (!anyOutdatedDistributorNodes && (currentStateVersion > lastClusterStateInSync)) { - log.log(LogLevel.DEBUG, "All distributors have newest clusterstate, updating start timestamps in zookeeper and clearing them from cluster state"); - lastClusterStateInSync = currentStateVersion; - fleetController.handleAllDistributorsInSync(database, dbContext); + if (!anyDistributorsNeedStateBundle && (currentStateVersion > lastStateVersionBundleAcked)) { + markCurrentClusterStateBundleAsReceivedByAllDistributors(); + if (clusterStateBundle.deferredActivation()) { + log.log(LogLevel.DEBUG, () -> String.format("All distributors have ACKed cluster state " + + "version %d, sending activation", currentStateVersion)); + } else { + markCurrentClusterStateAsConverged(database, dbContext, fleetController); + } + return; // Either converged (no two-phase) or activations must be sent before we can continue. + } + + if (anyDistributorsNeedStateBundle || !clusterStateBundle.deferredActivation()) { + return; } + + boolean anyDistributorsNeedActivation = dbContext.getCluster().getNodeInfo().stream() + .filter(NodeInfo::isDistributor) + .anyMatch(this::nodeNeedsClusterStateActivation); + + if (!anyDistributorsNeedActivation && (currentStateVersion > lastClusterStateVersionConverged)) { + markCurrentClusterStateAsConverged(database, dbContext, fleetController); + } else { + log.log(LogLevel.DEBUG, () -> String.format("distributors still need activation in state %d (last converged: %d)", + currentStateVersion, lastClusterStateVersionConverged)); + } + } + + private void markCurrentClusterStateBundleAsReceivedByAllDistributors() { + lastStateVersionBundleAcked = clusterStateBundle.getVersion(); } - public boolean broadcastNewState(DatabaseHandler.Context dbContext, Communicator communicator) { + private void markCurrentClusterStateAsConverged(DatabaseHandler database, DatabaseHandler.Context dbContext, FleetController fleetController) throws InterruptedException { + log.log(LogLevel.DEBUG, "All distributors have newest clusterstate, updating start timestamps in zookeeper and clearing them from cluster state"); + lastClusterStateVersionConverged = clusterStateBundle.getVersion(); + lastClusterStateBundleConverged = clusterStateBundle; + fleetController.handleAllDistributorsInSync(database, dbContext); + } + + private boolean currentClusterStateIsConverged() { + return lastClusterStateVersionConverged == clusterStateBundle.getVersion(); + } + + public boolean broadcastNewStateBundleIfRequired(DatabaseHandler.Context dbContext, Communicator communicator) { if (clusterStateBundle == null) { return false; } @@ -157,20 +272,44 @@ public class SystemStateBroadcaster { if (nodeNeedsToObserveStartupTimestamps(node)) { // TODO this is the same for all nodes, compute only once ClusterStateBundle modifiedBundle = clusterStateBundle.cloneWithMapper(state -> buildModifiedClusterState(state, dbContext)); - log.log(LogLevel.DEBUG, "Sending modified cluster state version " + baselineState.getVersion() - + " to node " + node + ": " + modifiedBundle); - communicator.setSystemState(modifiedBundle, node, waiter); + log.log(LogLevel.DEBUG, () -> String.format("Sending modified cluster state version %d" + + " to node %s: %s", baselineState.getVersion(), node, modifiedBundle)); + communicator.setSystemState(modifiedBundle, node, setClusterStateWaiter); } else { - log.log(LogLevel.DEBUG, "Sending system state version " + baselineState.getVersion() + " to node " + node - + ". (went down time " + node.getWentDownWithStartTime() + ", node start time " + node.getStartTimestamp() + ")"); - communicator.setSystemState(clusterStateBundle, node, waiter); + log.log(LogLevel.DEBUG, () -> String.format("Sending system state version %d to node %s. " + + "(went down time %d, node start time %d)", baselineState.getVersion(), node, + node.getWentDownWithStartTime(), node.getStartTimestamp())); + communicator.setSystemState(clusterStateBundle, node, setClusterStateWaiter); } } return !recipients.isEmpty(); } - public int lastClusterStateVersionInSync() { return lastClusterStateInSync; } + public boolean broadcastStateActivationsIfRequired(DatabaseHandler.Context dbContext, Communicator communicator) { + if (clusterStateBundle == null || !clusterStateBundle.getBaselineClusterState().isOfficial()) { + return false; + } + + if (!clusterStateBundle.deferredActivation() || !allDistributorsHaveAckedSentClusterStateBundle()) { + return false; + } + + var recipients = resolveStateActivationSendSet(dbContext); + for (NodeInfo node : recipients) { + log.log(LogLevel.DEBUG, () -> String.format("Sending cluster state activation to node %s for version %d", + node, clusterStateBundle.getVersion())); + communicator.activateClusterStateVersion(clusterStateBundle.getVersion(), node, activateClusterStateVersionWaiter); + } + + return !recipients.isEmpty(); + } + + private boolean allDistributorsHaveAckedSentClusterStateBundle() { + return (lastStateVersionBundleAcked == clusterStateBundle.getVersion()); + } + + public int lastClusterStateVersionInSync() { return lastClusterStateVersionConverged; } private static boolean nodeNeedsToObserveStartupTimestamps(NodeInfo node) { return node.getStartTimestamp() != 0 && node.getWentDownWithStartTime() == node.getStartTimestamp(); @@ -188,11 +327,20 @@ public class SystemStateBroadcaster { return newState; } - private class ClusterStateWaiter implements Communicator.Waiter<SetClusterStateRequest> { + private class SetClusterStateWaiter implements Communicator.Waiter<SetClusterStateRequest> { @Override public void done(SetClusterStateRequest reply) { synchronized (monitor) { - replies.add(reply); + setClusterStateReplies.add(reply); + } + } + } + + private class ActivateClusterStateVersionWaiter implements Communicator.Waiter<ActivateClusterStateVersionRequest> { + @Override + public void done(ActivateClusterStateVersionRequest reply) { + synchronized (monitor) { + activateClusterStateVersionReplies.add(reply); } } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java index 764bb3a0d92..a0d53e8c93e 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/listeners/SystemStateListener.java @@ -8,6 +8,14 @@ public interface SystemStateListener { // TODO consider rename to bundle void handleNewPublishedState(ClusterStateBundle states); + /** + * Invoked at the edge when all pending cluster state bundles and version activations + * have been successfully ACKed by all distributors in the cluster. + * + * @param states bundle that has converged across all distributors + */ + default void handleStateConvergedInCluster(ClusterStateBundle states) {} + default void handleNewCandidateState(ClusterStateBundle states) {} } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionRequest.java new file mode 100644 index 00000000000..c2f48ccf589 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionRequest.java @@ -0,0 +1,20 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core.rpc; + +import com.yahoo.jrt.Request; +import com.yahoo.vespa.clustercontroller.core.ActivateClusterStateVersionRequest; +import com.yahoo.vespa.clustercontroller.core.NodeInfo; + +/** + * FRT RPC state implementation of a single cluster state activation request. + */ +public class RPCActivateClusterStateVersionRequest extends ActivateClusterStateVersionRequest { + + Request request; + + public RPCActivateClusterStateVersionRequest(NodeInfo nodeInfo, Request request, int clusterStateVersion) { + super(nodeInfo, clusterStateVersion); + this.request = request; + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionWaiter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionWaiter.java new file mode 100644 index 00000000000..175a0b50cd6 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCActivateClusterStateVersionWaiter.java @@ -0,0 +1,47 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core.rpc; + +import com.yahoo.jrt.ErrorCode; +import com.yahoo.jrt.Request; +import com.yahoo.jrt.RequestWaiter; +import com.yahoo.vespa.clustercontroller.core.ActivateClusterStateVersionRequest; +import com.yahoo.vespa.clustercontroller.core.Communicator; +import com.yahoo.vespa.clustercontroller.core.NodeInfo; +import com.yahoo.vespa.clustercontroller.core.Timer; + +/** + * Binds together the reply received for a particular cluster state activation RPC and + * the cluster controller-internal callback handler which expects to receive it. + */ +public class RPCActivateClusterStateVersionWaiter implements RequestWaiter { + + private final Communicator.Waiter<ActivateClusterStateVersionRequest> waiter; + private ActivateClusterStateVersionRequest request; + + public RPCActivateClusterStateVersionWaiter(Communicator.Waiter<ActivateClusterStateVersionRequest> waiter) { + this.waiter = waiter; + } + + public void setRequest(RPCActivateClusterStateVersionRequest request) { + this.request = request; + } + + public ActivateClusterStateVersionRequest.Reply getReply(Request req) { + NodeInfo info = request.getNodeInfo(); + if (req.isError()) { + return new ActivateClusterStateVersionRequest.Reply(req.errorCode(), req.errorMessage()); + } else if (!req.checkReturnTypes("i")) { + return new ActivateClusterStateVersionRequest.Reply(ErrorCode.BAD_REPLY, "Got RPC response with invalid return types from " + info); + } + int actualVersion = req.returnValues().get(0).asInt32(); + return ActivateClusterStateVersionRequest.Reply.withActualVersion(actualVersion); + } + + @Override + public void handleRequestDone(Request request) { + ActivateClusterStateVersionRequest.Reply reply = getReply(request); + this.request.setReply(reply); + waiter.done(this.request); + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java index 9089da68e10..c3c5c9e3b98 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicator.java @@ -15,6 +15,7 @@ import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.State; import com.yahoo.log.LogLevel; +import com.yahoo.vespa.clustercontroller.core.ActivateClusterStateVersionRequest; import com.yahoo.vespa.clustercontroller.core.ClusterStateBundle; import com.yahoo.vespa.clustercontroller.core.Communicator; import com.yahoo.vespa.clustercontroller.core.FleetControllerOptions; @@ -35,6 +36,9 @@ public class RPCCommunicator implements Communicator { public static final Logger log = Logger.getLogger(RPCCommunicator.class.getName()); + public static final int ACTIVATE_CLUSTER_STATE_VERSION_RPC_VERSION = 4; + public static final String ACTIVATE_CLUSTER_STATE_VERSION_RPC_METHOD_NAME = "activate_cluster_state_version"; + public static final int SET_DISTRIBUTION_STATES_RPC_VERSION = 3; public static final String SET_DISTRIBUTION_STATES_RPC_METHOD_NAME = "setdistributionstates"; @@ -106,7 +110,7 @@ public class RPCCommunicator implements Communicator { public void getNodeState(NodeInfo node, Waiter<GetNodeStateRequest> externalWaiter) { Target connection = getConnection(node); if ( ! connection.isValid()) { - log.log(LogLevel.DEBUG, "Connection to " + node.getRpcAddress() + " could not be created."); + log.log(LogLevel.DEBUG, () -> String.format("Connection to '%s' could not be created.", node.getRpcAddress())); } NodeState currentState = node.getReportedState(); Request req = new Request("getnodestate3"); @@ -134,7 +138,7 @@ public class RPCCommunicator implements Communicator { Target connection = getConnection(node); if ( ! connection.isValid()) { - log.log(LogLevel.DEBUG, "Connection to " + node.getRpcAddress() + " could not be created."); + log.log(LogLevel.DEBUG, () -> String.format("Connection to '%s' could not be created.", node.getRpcAddress())); return; } int nodeVersion = node.getVersion(); @@ -158,7 +162,29 @@ public class RPCCommunicator implements Communicator { waiter.setRequest(stateRequest); connection.invokeAsync(req, 60, waiter); - node.setSystemStateVersionSent(baselineState); + node.setClusterStateVersionBundleSent(stateBundle); + } + + @Override + public void activateClusterStateVersion(int clusterStateVersion, NodeInfo node, Waiter<ActivateClusterStateVersionRequest> externalWaiter) { + var waiter = new RPCActivateClusterStateVersionWaiter(externalWaiter); + + Target connection = getConnection(node); + if ( ! connection.isValid()) { + log.log(LogLevel.DEBUG, () -> String.format("Connection to '%s' could not be created.", node.getRpcAddress())); + return; + } + + var req = new Request(ACTIVATE_CLUSTER_STATE_VERSION_RPC_METHOD_NAME); + req.parameters().add(new Int32Value(clusterStateVersion)); + + log.log(LogLevel.DEBUG, () -> String.format("Sending '%s' RPC to %s for state version %d", + req.methodName(), node.getRpcAddress(), clusterStateVersion)); + var activationRequest = new RPCActivateClusterStateVersionRequest(node, req, clusterStateVersion); + waiter.setRequest(activationRequest); + + connection.invokeAsync(req, 60, waiter); + node.setClusterStateVersionActivationSent(clusterStateVersion); } // protected for testing. diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java index 1c391f9aacf..cb76f67038c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java @@ -30,6 +30,9 @@ public class SlimeClusterStateBundleCodec implements ClusterStateBundleCodec, En public EncodedClusterStateBundle encode(ClusterStateBundle stateBundle) { Slime slime = new Slime(); Cursor root = slime.setObject(); + if (stateBundle.deferredActivation()) { + root.setBool("deferred-activation", stateBundle.deferredActivation()); + } Cursor states = root.setObject("states"); // TODO add another function that is not toString for this..! states.setString("baseline", stateBundle.getBaselineClusterState().toString()); @@ -55,8 +58,9 @@ public class SlimeClusterStateBundleCodec implements ClusterStateBundleCodec, En spaces.traverse(((ObjectTraverser)(key, value) -> { derivedStates.put(key, AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(value.asString()))); })); + boolean deferredActivation = root.field("deferred-activation").asBool(); // defaults to false if not present - return ClusterStateBundle.of(AnnotatedClusterState.withoutAnnotations(baseline), derivedStates); + return ClusterStateBundle.of(AnnotatedClusterState.withoutAnnotations(baseline), derivedStates, deferredActivation); } // Technically the Slime enveloping could be its own class that is bundle codec independent, but diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java index 07195d05aa8..6d11c4f1239 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java @@ -65,7 +65,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa new VdsClusterHtmlRendrer(), content, timer, - stateVersionTracker.getVersionedClusterState(), + stateVersionTracker.getVersionedClusterStateBundle(), stateVersionTracker.getAggregatedClusterStats(), data.getOptions().storageDistribution, data.getOptions(), diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java index 551eb34f8fa..0daa9ff7d37 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRendrer.java @@ -64,7 +64,7 @@ public class VdsClusterHtmlRendrer { final TreeMap<Integer, NodeInfo> storageNodeInfos, final TreeMap<Integer, NodeInfo> distributorNodeInfos, final Timer timer, - final ClusterState state, + final ClusterStateBundle state, final ClusterStatsAggregator statsAggregator, final double minMergeCompletionRatio, final int maxPrematureCrashes, @@ -161,7 +161,7 @@ public class VdsClusterHtmlRendrer { final TreeMap<Integer, NodeInfo> nodeInfos, final NodeType nodeType, final Timer timer, - final ClusterState state, + final ClusterStateBundle stateBundle, final ClusterStatsAggregator statsAggregator, final double minMergeCompletionRatio, final int maxPrematureCrashes, @@ -169,145 +169,180 @@ public class VdsClusterHtmlRendrer { final String pathPrefix, final String dominantVtag, final String name) { + final ClusterState state = stateBundle.getBaselineClusterState(); final long currentTime = timer.getCurrentTimeInMillis(); addTableHeader(name, nodeType); for (final NodeInfo nodeInfo : nodeInfos.values()) { HtmlTable.Row row = new HtmlTable.Row(); + long timeSinceContact = nodeInfo.getTimeOfFirstFailingConnectionAttempt() == 0 + ? 0 : currentTime - nodeInfo.getTimeOfFirstFailingConnectionAttempt(); - // Add node index - row.addCell(new HtmlTable.Cell("<a href=\"" + pathPrefix + "/node=" + nodeInfo.getNode() - + "\">" + nodeInfo.getNodeIndex() + "</a>")); + addNodeIndex(pathPrefix, nodeInfo, row); + addReportedState(nodeInfo, row); + addWantedState(nodeInfo, row); + addCurrentState(state, nodeInfo, row); + addBuildTagVersion(dominantVtag, nodeInfo, row); + addFailedConnectionAttemptCount(nodeInfo, row, timeSinceContact); + addTimeSinceFirstFailing(nodeInfo, row, timeSinceContact); + addStatePendingTime(currentTime, nodeInfo, row); + addClusterStateVersion(stateBundle, nodeInfo, row); + addPrematureCrashes(maxPrematureCrashes, nodeInfo, row); + addEventsLastWeek(eventLog, currentTime, nodeInfo, row); + addBucketSpacesStats(nodeType, statsAggregator, minMergeCompletionRatio, nodeInfo, row); + addStartTime(nodeInfo, row); + addRpcAddress(nodeInfo, row); - // Add reported state - NodeState reportedState = nodeInfo.getReportedState().clone().setStartTimestamp(0); - row.addCell(new HtmlTable.Cell(HtmlTable.escape(reportedState.toString(true)))); - if (!nodeInfo.getReportedState().getState().equals(State.UP)) { + table.addRow(row); + } + } + + private void addRpcAddress(NodeInfo nodeInfo, HtmlTable.Row row) { + if (nodeInfo.getRpcAddress() == null) { + row.addCell(new HtmlTable.Cell("-").addProperties(ERROR_PROPERTY)); + } else { + row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getRpcAddress()))); + if (nodeInfo.isRpcAddressOutdated()) { row.getLastCell().addProperties(WARNING_PROPERTY); } + } + } - // Add wanted state - if (nodeInfo.getWantedState() == null || nodeInfo.getWantedState().getState().equals(State.UP)) { - row.addCell(new HtmlTable.Cell("-").addProperties(CENTERED_PROPERTY)); - } else { - row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getWantedState().toString(true)))); - if (nodeInfo.getWantedState().toString(true).indexOf("Disabled by fleet controller") != -1) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else { - row.getLastCell().addProperties(WARNING_PROPERTY); - } - } + private void addStartTime(NodeInfo nodeInfo, HtmlTable.Row row) { + if (nodeInfo.getStartTimestamp() == 0) { + row.addCell(new HtmlTable.Cell("-").addProperties(ERROR_PROPERTY).addProperties(CENTERED_PROPERTY)); + } else { + String startTime = RealTimer.printDateNoMilliSeconds( + 1000 * nodeInfo.getStartTimestamp(), utcTimeZone); + row.addCell(new HtmlTable.Cell(HtmlTable.escape(startTime))); + } + } - // Add current state - NodeState ns = state.getNodeState(nodeInfo.getNode()).clone().setDescription("").setMinUsedBits(16); - if (state.getClusterState().oneOf("uir")) { - row.addCell(new HtmlTable.Cell(HtmlTable.escape(ns.toString(true)))); - if (ns.getState().equals(State.DOWN)) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (ns.getState().oneOf("mi")) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } - } else { - row.addCell(new HtmlTable.Cell("Cluster " + - state.getClusterState().name().toLowerCase()).addProperties(ERROR_PROPERTY)); - } + private void addBucketSpacesStats(NodeType nodeType, ClusterStatsAggregator statsAggregator, double minMergeCompletionRatio, NodeInfo nodeInfo, HtmlTable.Row row) { + if (nodeType.equals(NodeType.STORAGE)) { + addBucketStats(row, getStatsForContentNode(statsAggregator, nodeInfo, FixedBucketSpaces.defaultSpace()), + minMergeCompletionRatio); + addBucketStats(row, getStatsForContentNode(statsAggregator, nodeInfo, FixedBucketSpaces.globalSpace()), + minMergeCompletionRatio); + } else { + addBucketStats(row, getStatsForDistributorNode(statsAggregator, nodeInfo, FixedBucketSpaces.defaultSpace()), + minMergeCompletionRatio); + addBucketStats(row, getStatsForDistributorNode(statsAggregator, nodeInfo, FixedBucketSpaces.globalSpace()), + minMergeCompletionRatio); + } + } - // Add build tag version. - final String buildTagText = - nodeInfo.getVtag() != null - ? nodeInfo.getVtag() - : TAG_NOT_SET; - row.addCell(new HtmlTable.Cell(buildTagText)); - if (! dominantVtag.equals(nodeInfo.getVtag())) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } + private void addEventsLastWeek(EventLog eventLog, long currentTime, NodeInfo nodeInfo, HtmlTable.Row row) { + int nodeEvents = eventLog.getNodeEventsSince(nodeInfo.getNode(), + currentTime - eventLog.getRecentTimePeriod()); + row.addCell(new HtmlTable.Cell("" + nodeEvents)); + if (nodeEvents > 20) { + row.getLastCell().addProperties(ERROR_PROPERTY); + } else if (nodeEvents > 3) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } - // Add failed connection attempt count - row.addCell(new HtmlTable.Cell("" + nodeInfo.getConnectionAttemptCount())); - long timeSinceContact = nodeInfo.getTimeOfFirstFailingConnectionAttempt() == 0 - ? 0 : currentTime - nodeInfo.getTimeOfFirstFailingConnectionAttempt(); - if (timeSinceContact > 60 * 1000) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeInfo.getConnectionAttemptCount() > 0) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } + private void addPrematureCrashes(int maxPrematureCrashes, NodeInfo nodeInfo, HtmlTable.Row row) { + row.addCell(new HtmlTable.Cell("" + nodeInfo.getPrematureCrashCount())); + if (nodeInfo.getPrematureCrashCount() >= maxPrematureCrashes) { + row.getLastCell().addProperties(ERROR_PROPERTY); + } else if (nodeInfo.getPrematureCrashCount() > 0) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } - // Add time since first failing - row.addCell(new HtmlTable.Cell((timeSinceContact / 1000) + " s")); - if (timeSinceContact > 60 * 1000) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeInfo.getConnectionAttemptCount() > 0) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } + private void addClusterStateVersion(ClusterStateBundle state, NodeInfo nodeInfo, HtmlTable.Row row) { + String cellContent = (nodeInfo.getClusterStateVersionActivationAcked() == state.getVersion() || !state.deferredActivation()) + ? String.format("%d", nodeInfo.getClusterStateVersionBundleAcknowledged()) + : String.format("%d (%d)", nodeInfo.getClusterStateVersionBundleAcknowledged(), + nodeInfo.getClusterStateVersionActivationAcked()); + row.addCell(new HtmlTable.Cell(cellContent)); + if (nodeInfo.getClusterStateVersionBundleAcknowledged() < state.getVersion() - 2) { + row.getLastCell().addProperties(ERROR_PROPERTY); + } else if (nodeInfo.getClusterStateVersionBundleAcknowledged() < state.getVersion()) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } - // State pending time - if (nodeInfo.getLatestNodeStateRequestTime() == null) { - row.addCell(new HtmlTable.Cell("-").addProperties(CENTERED_PROPERTY)); - } else { - row.addCell(new HtmlTable.Cell(HtmlTable.escape(RealTimer.printDuration( - currentTime - nodeInfo.getLatestNodeStateRequestTime())))); - } + private void addStatePendingTime(long currentTime, NodeInfo nodeInfo, HtmlTable.Row row) { + if (nodeInfo.getLatestNodeStateRequestTime() == null) { + row.addCell(new HtmlTable.Cell("-").addProperties(CENTERED_PROPERTY)); + } else { + row.addCell(new HtmlTable.Cell(HtmlTable.escape(RealTimer.printDuration( + currentTime - nodeInfo.getLatestNodeStateRequestTime())))); + } + } - // System state version - row.addCell(new HtmlTable.Cell("" + nodeInfo.getSystemStateVersionAcknowledged())); - if (nodeInfo.getSystemStateVersionAcknowledged() < state.getVersion() - 2) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeInfo.getSystemStateVersionAcknowledged() < state.getVersion()) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } + private void addTimeSinceFirstFailing(NodeInfo nodeInfo, HtmlTable.Row row, long timeSinceContact) { + row.addCell(new HtmlTable.Cell((timeSinceContact / 1000) + " s")); + if (timeSinceContact > 60 * 1000) { + row.getLastCell().addProperties(ERROR_PROPERTY); + } else if (nodeInfo.getConnectionAttemptCount() > 0) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } - // Premature crashes - row.addCell(new HtmlTable.Cell("" + nodeInfo.getPrematureCrashCount())); - if (nodeInfo.getPrematureCrashCount() >= maxPrematureCrashes) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeInfo.getPrematureCrashCount() > 0) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } + private void addFailedConnectionAttemptCount(NodeInfo nodeInfo, HtmlTable.Row row, long timeSinceContact) { + row.addCell(new HtmlTable.Cell("" + nodeInfo.getConnectionAttemptCount())); + if (timeSinceContact > 60 * 1000) { + row.getLastCell().addProperties(ERROR_PROPERTY); + } else if (nodeInfo.getConnectionAttemptCount() > 0) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } + + private void addBuildTagVersion(String dominantVtag, NodeInfo nodeInfo, HtmlTable.Row row) { + final String buildTagText = + nodeInfo.getVtag() != null + ? nodeInfo.getVtag() + : TAG_NOT_SET; + row.addCell(new HtmlTable.Cell(buildTagText)); + if (! dominantVtag.equals(nodeInfo.getVtag())) { + row.getLastCell().addProperties(WARNING_PROPERTY); + } + } - // Events last week - int nodeEvents = eventLog.getNodeEventsSince(nodeInfo.getNode(), - currentTime - eventLog.getRecentTimePeriod()); - row.addCell(new HtmlTable.Cell("" + nodeEvents)); - if (nodeEvents > 20) { + private void addCurrentState(ClusterState state, NodeInfo nodeInfo, HtmlTable.Row row) { + NodeState ns = state.getNodeState(nodeInfo.getNode()).clone().setDescription("").setMinUsedBits(16); + if (state.getClusterState().oneOf("uir")) { + row.addCell(new HtmlTable.Cell(HtmlTable.escape(ns.toString(true)))); + if (ns.getState().equals(State.DOWN)) { row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeEvents > 3) { + } else if (ns.getState().oneOf("mi")) { row.getLastCell().addProperties(WARNING_PROPERTY); } + } else { + row.addCell(new HtmlTable.Cell("Cluster " + + state.getClusterState().name().toLowerCase()).addProperties(ERROR_PROPERTY)); + } + } - // Bucket stats for 'default' and 'global' spaces - if (nodeType.equals(NodeType.STORAGE)) { - addBucketStats(row, getStatsForContentNode(statsAggregator, nodeInfo, FixedBucketSpaces.defaultSpace()), - minMergeCompletionRatio); - addBucketStats(row, getStatsForContentNode(statsAggregator, nodeInfo, FixedBucketSpaces.globalSpace()), - minMergeCompletionRatio); - } else { - addBucketStats(row, getStatsForDistributorNode(statsAggregator, nodeInfo, FixedBucketSpaces.defaultSpace()), - minMergeCompletionRatio); - addBucketStats(row, getStatsForDistributorNode(statsAggregator, nodeInfo, FixedBucketSpaces.globalSpace()), - minMergeCompletionRatio); - } - - // Start time - if (nodeInfo.getStartTimestamp() == 0) { - row.addCell(new HtmlTable.Cell("-").addProperties(ERROR_PROPERTY).addProperties(CENTERED_PROPERTY)); + private void addWantedState(NodeInfo nodeInfo, HtmlTable.Row row) { + if (nodeInfo.getWantedState() == null || nodeInfo.getWantedState().getState().equals(State.UP)) { + row.addCell(new HtmlTable.Cell("-").addProperties(CENTERED_PROPERTY)); + } else { + row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getWantedState().toString(true)))); + if (nodeInfo.getWantedState().toString(true).indexOf("Disabled by fleet controller") != -1) { + row.getLastCell().addProperties(ERROR_PROPERTY); } else { - String startTime = RealTimer.printDateNoMilliSeconds( - 1000 * nodeInfo.getStartTimestamp(), utcTimeZone); - row.addCell(new HtmlTable.Cell(HtmlTable.escape(startTime))); + row.getLastCell().addProperties(WARNING_PROPERTY); } + } + } - // RPC address - if (nodeInfo.getRpcAddress() == null) { - row.addCell(new HtmlTable.Cell("-").addProperties(ERROR_PROPERTY)); - } else { - row.addCell(new HtmlTable.Cell(HtmlTable.escape(nodeInfo.getRpcAddress()))); - if (nodeInfo.isRpcAddressOutdated()) { - row.getLastCell().addProperties(WARNING_PROPERTY); - } - } - table.addRow(row); + private void addReportedState(NodeInfo nodeInfo, HtmlTable.Row row) { + NodeState reportedState = nodeInfo.getReportedState().clone().setStartTimestamp(0); + row.addCell(new HtmlTable.Cell(HtmlTable.escape(reportedState.toString(true)))); + if (!nodeInfo.getReportedState().getState().equals(State.UP)) { + row.getLastCell().addProperties(WARNING_PROPERTY); } } + private void addNodeIndex(String pathPrefix, NodeInfo nodeInfo, HtmlTable.Row row) { + row.addCell(new HtmlTable.Cell("<a href=\"" + pathPrefix + "/node=" + nodeInfo.getNode() + + "\">" + nodeInfo.getNodeIndex() + "</a>")); + } + private static ContentNodeStats.BucketSpaceStats getStatsForContentNode(ClusterStatsAggregator statsAggregator, NodeInfo nodeInfo, String bucketSpace) { @@ -355,7 +390,7 @@ public class VdsClusterHtmlRendrer { .append("3) SPT - State pending time - Time the current getNodeState request has been " + "pending.<br>\n") .append("4) SSV - System state version - The latest system state version the node has " + - "acknowledged.<br>\n") + "acknowledged (last <em>activated</em> state version in parentheses if this is not equal to SSV).<br>\n") .append("5) PC - Premature crashes - Number of times node has crashed since last time it had " + "been stable in up or down state for more than " + RealTimer.printDuration(stableStateTimePeriode) + ".<br>\n") diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java index 7dccae988df..339d305e823 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java @@ -4,8 +4,11 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.vdslib.state.*; import org.junit.Test; +import java.util.function.Function; + import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -19,7 +22,7 @@ public class ClusterStateBundleTest { return AnnotatedClusterState.withoutAnnotations(stateOf(state)); } - private static ClusterStateBundle createTestBundle(boolean modifyDefaultSpace) { + private static ClusterStateBundle.Builder createTestBundleBuilder(boolean modifyDefaultSpace) { return ClusterStateBundle .builder(annotatedStateOf("distributor:2 storage:2")) .bucketSpaces("default", "global", "narnia") @@ -33,8 +36,11 @@ public class ClusterStateBundleTest { .setNodeState(Node.ofDistributor(0), new NodeState(NodeType.DISTRIBUTOR, State.DOWN)); } return derived; - }) - .deriveAndBuild(); + }); + } + + private static ClusterStateBundle createTestBundle(boolean modifyDefaultSpace) { + return createTestBundleBuilder(modifyDefaultSpace).deriveAndBuild(); } private static ClusterStateBundle createTestBundle() { @@ -96,4 +102,75 @@ public class ClusterStateBundleTest { "narnia 'distributor:2 .0.s:d storage:2')")); } + @Test + public void toString_without_derived_states_specifies_deferred_activation_iff_set() { + var bundle = ClusterStateBundle.ofBaselineOnly(annotatedStateOf("distributor:2 storage:2"), true); + assertThat(bundle.toString(), equalTo("ClusterStateBundle('distributor:2 storage:2' (deferred activation))")); + } + + @Test + public void toString_without_derived_states_does_not_specify_deferred_activation_iff_not_set() { + var bundle = ClusterStateBundle.ofBaselineOnly(annotatedStateOf("distributor:2 storage:2"), false); + assertThat(bundle.toString(), equalTo("ClusterStateBundle('distributor:2 storage:2')")); + } + + @Test + public void toString_with_derived_states_specifies_deferred_activation_iff_set() { + var bundle = createTestBundleBuilder(true).deferredActivation(true).deriveAndBuild(); + assertThat(bundle.toString(), equalTo("ClusterStateBundle('distributor:2 storage:2', " + + "default 'distributor:2 storage:2 .0.s:d', " + + "global 'distributor:2 storage:2', " + + "narnia 'distributor:2 .0.s:d storage:2' (deferred activation))")); + } + + @Test + public void toString_with_derived_states_does_not_specify_deferred_activation_iff_not_set() { + var bundle = createTestBundleBuilder(true).deferredActivation(false).deriveAndBuild(); + assertThat(bundle.toString(), equalTo("ClusterStateBundle('distributor:2 storage:2', " + + "default 'distributor:2 storage:2 .0.s:d', " + + "global 'distributor:2 storage:2', " + + "narnia 'distributor:2 .0.s:d storage:2')")); + } + + @Test + public void deferred_activation_is_disabled_by_default() { + ClusterStateBundle bundle = createTestBundle(); + assertFalse(bundle.deferredActivation()); + } + + @Test + public void can_build_bundle_with_deferred_activation_enabled() { + var bundle = createTestBundleBuilder(false).deferredActivation(true).deriveAndBuild(); + assertTrue(bundle.deferredActivation()); + } + + @Test + public void can_build_bundle_with_deferred_activation_disabled() { + var bundle = createTestBundleBuilder(false).deferredActivation(false).deriveAndBuild(); + assertFalse(bundle.deferredActivation()); + } + + @Test + public void simple_bundle_without_derived_states_propagates_deferred_activation_flag() { + var bundle = ClusterStateBundle + .builder(annotatedStateOf("distributor:2 storage:2")) + .deferredActivation(true) // defaults to false + .deriveAndBuild(); + assertTrue(bundle.deferredActivation()); + } + + @Test + public void cloning_preserves_false_deferred_activation_flag() { + var bundle = createTestBundleBuilder(true).deferredActivation(false).deriveAndBuild(); + var derived = bundle.cloneWithMapper(Function.identity()); + assertEquals(bundle, derived); + } + + @Test + public void cloning_preserves_true_deferred_activation_flag() { + var bundle = createTestBundleBuilder(true).deferredActivation(true).deriveAndBuild(); + var derived = bundle.cloneWithMapper(Function.identity()); + assertEquals(bundle, derived); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java index 00c2194205d..cceb6d6f03f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java @@ -12,6 +12,12 @@ import java.util.stream.Stream; */ public class ClusterStateBundleUtil { + public static ClusterStateBundle.Builder makeBundleBuilder(String baselineState, StateMapping... bucketSpaceStates) { + return ClusterStateBundle.builder(AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(baselineState))) + .explicitDerivedStates(Stream.of(bucketSpaceStates).collect(Collectors.toMap(sm -> sm.bucketSpace, + sm -> AnnotatedClusterState.withoutAnnotations(sm.state)))); + } + public static ClusterStateBundle makeBundle(String baselineState, StateMapping... bucketSpaceStates) { return ClusterStateBundle.of(AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(baselineState)), Stream.of(bucketSpaceStates).collect(Collectors.toMap(sm -> sm.bucketSpace, diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java index f5adf644c28..9a3edf8e681 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentClusterHtmlRendrerTest.java @@ -29,7 +29,10 @@ public class ContentClusterHtmlRendrerTest { @Before public void before() throws JSONException, ParseException { - final ClusterState state = new ClusterState("version:34633 bits:24 distributor:211 storage:211"); + final ClusterStateBundle stateBundle = ClusterStateBundle.ofBaselineOnly( + AnnotatedClusterState.withoutAnnotations( + ClusterState.stateFromString("version:34633 bits:24 distributor:211 storage:211"))); + final ClusterState state = stateBundle.getBaselineClusterState(); final EventLog eventLog = new EventLog(new FakeTimer(), null); final VdsClusterHtmlRendrer.Table table = rendrer.createNewClusterHtmlTable(clusterName, slobrokGeneration); @@ -55,7 +58,7 @@ public class ContentClusterHtmlRendrerTest { storageNodeInfoByIndex, distributorNodeInfoByIndex, new FakeTimer(), - state, + stateBundle, statsAggregator, 1.0, 10, diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index e54a333a15e..27dcd009c96 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -49,7 +49,7 @@ public class DatabaseTest extends FleetControllerTest { @Test public void testWantedStatesInZooKeeper() throws Exception { startingTest("DatabaseTest::testWantedStatesInZooKeeper"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.zooKeeperServerAddress = "127.0.0.1"; setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); @@ -105,7 +105,7 @@ public class DatabaseTest extends FleetControllerTest { @Test public void testWantedStateOfUnknownNode() throws Exception { startingTest("DatabaseTest::testWantedStatesOfUnknownNode"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.minRatioOfDistributorNodesUp = 0; options.minRatioOfStorageNodesUp = 0; options.zooKeeperServerAddress = "localhost"; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java index ae59336b5ef..77164b678c6 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java @@ -20,7 +20,7 @@ public class DistributionBitCountTest extends FleetControllerTest { for (int i = 0 ; i < 10; i++) { configuredNodes.add(new ConfiguredNode(i, false)); } - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.distributionBits = 17; setUpFleetController(false, options); startingTest(testName); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java index 5d200d65516..8314839336e 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java @@ -24,7 +24,7 @@ public class DummyCommunicator implements Communicator, NodeLookup { this.shouldDeferDistributorClusterStateAcks = shouldDeferDistributorClusterStateAcks; } - public class DummyGetNodeStateRequest extends GetNodeStateRequest { + class DummyGetNodeStateRequest extends GetNodeStateRequest { Waiter<GetNodeStateRequest> waiter; public DummyGetNodeStateRequest(NodeInfo nodeInfo, Waiter<GetNodeStateRequest> waiter) { @@ -47,6 +47,14 @@ public class DummyCommunicator implements Communicator, NodeLookup { } + public class DummyActivateClusterStateVersionRequest extends ActivateClusterStateVersionRequest { + + public DummyActivateClusterStateVersionRequest(NodeInfo nodeInfo, int stateVersion) { + super(nodeInfo, stateVersion); + } + + } + private Map<Node, DummyGetNodeStateRequest> getNodeStateRequests = new TreeMap<>(); public DummyCommunicator(List<Node> nodeList, Timer timer) { @@ -89,7 +97,7 @@ public class DummyCommunicator implements Communicator, NodeLookup { public void setSystemState(ClusterStateBundle stateBundle, NodeInfo node, Waiter<SetClusterStateRequest> waiter) { ClusterState baselineState = stateBundle.getBaselineClusterState(); DummySetClusterStateRequest req = new DummySetClusterStateRequest(node, baselineState); - node.setSystemStateVersionSent(baselineState); + node.setClusterStateVersionBundleSent(stateBundle); req.setReply(new SetClusterStateRequest.Reply()); if (node.isStorage() || !shouldDeferDistributorClusterStateAcks) { waiter.done(req); @@ -98,6 +106,13 @@ public class DummyCommunicator implements Communicator, NodeLookup { } } + @Override + public void activateClusterStateVersion(int clusterStateVersion, NodeInfo node, Waiter<ActivateClusterStateVersionRequest> waiter) { + var req = new DummyActivateClusterStateVersionRequest(node, clusterStateVersion); + req.setReply(ActivateClusterStateVersionRequest.Reply.withActualVersion(clusterStateVersion)); + waiter.done(req); + } + public void sendAllDeferredDistributorClusterStateAcks() { deferredClusterStateAcks.forEach(reqAndWaiter -> reqAndWaiter.getFirst().done(reqAndWaiter.getSecond())); deferredClusterStateAcks.clear(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java index 6d59a672e86..bd68f0fa343 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java @@ -68,6 +68,7 @@ public class DummyVdsNode { * Any access to this list or to its members must be synchronized on the timer variable. */ private List<ClusterStateBundle> clusterStateBundles = new LinkedList<>(); + private int activatedClusterStateVersion = 0; private Thread messageResponder = new Thread() { public void run() { @@ -220,6 +221,12 @@ public class DummyVdsNode { } } + public int getActivatedClusterStateVersion() { + synchronized (timer) { + return activatedClusterStateVersion; + } + } + public boolean hasPendingGetNodeStateRequest() { synchronized (timer) { return !waitingRequests.isEmpty(); @@ -300,14 +307,21 @@ public class DummyVdsNode { public ClusterStateBundle getClusterStateBundle() { synchronized(timer) { - return (clusterStateBundles.isEmpty() ? null : clusterStateBundles.get(0)); + // In a two-phase state activation scenario, bundles are added to `clusterStateBundles` _before_ + // the version has been activated. Since we want this method to only return _activated_ bundles + // we filter out versions that are not yet activated. In a non two-phase scenario the activated + // version is implicitly the same as the most recently received bundle, so the filter is a no-op. + return clusterStateBundles.stream() + .filter(b -> b.getVersion() <= activatedClusterStateVersion) + .findFirst() // Most recent cluster state bundle first in list + .orElse(null); } } public ClusterState getClusterState() { - synchronized(timer) { - return (clusterStateBundles.isEmpty() ? null : clusterStateBundles.get(0).getBaselineClusterState()); - } + return Optional.ofNullable(getClusterStateBundle()) + .map(b -> b.getBaselineClusterState()) + .orElse(null); } public String getSlobrokName() { @@ -369,6 +383,13 @@ public class DummyVdsNode { m.paramDesc(2, "payload", "Slime format payload"); supervisor.addMethod(m); } + if (stateCommunicationVersion >= RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_VERSION) { + m = new Method(RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_METHOD_NAME, "i", "i", this, "rpc_activateClusterStateVersion"); + m.methodDesc("Activate a given cluster state version"); + m.paramDesc(0, "stateVersion", "Cluster state version to activate"); + m.returnDesc(0, "actualVersion", "Actual cluster state version on node"); + supervisor.addMethod(m); + } } public void rpc_storageConnect(Request req) { @@ -439,7 +460,7 @@ public class DummyVdsNode { } } } catch (Exception e) { - log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering " + req.methodName() + " request: " + e.getMessage()); + log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occurred when answering " + req.methodName() + " request: " + e.getMessage()); e.printStackTrace(System.err); req.setError(ErrorCode.METHOD_FAILED, e.getMessage()); } @@ -499,7 +520,7 @@ public class DummyVdsNode { req.returnValues().add(new StringValue("OK")); log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state (through old setsystemstate call) " + newState); } catch (Exception e) { - log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage()); + log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occurred when answering setsystemstate request: " + e.getMessage()); e.printStackTrace(System.err); req.returnValues().add(new Int32Value(ErrorCode.METHOD_FAILED)); req.returnValues().add(new StringValue(e.getMessage())); @@ -516,11 +537,14 @@ public class DummyVdsNode { synchronized(timer) { updateStartTimestamps(newState); clusterStateBundles.add(0, ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.withoutAnnotations(newState))); + if (stateCommunicationVersion < RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_VERSION) { + activatedClusterStateVersion = newState.getVersion(); // Simulate node that does not know of activation + } timer.notifyAll(); } log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new system state " + newState); } catch (Exception e) { - log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setsystemstate request: " + e.getMessage()); + log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occurred when answering setsystemstate request: " + e.getMessage()); e.printStackTrace(System.err); req.setError(ErrorCode.METHOD_FAILED, e.getMessage()); } @@ -536,11 +560,41 @@ public class DummyVdsNode { synchronized(timer) { updateStartTimestamps(stateBundle.getBaselineClusterState()); clusterStateBundles.add(0, stateBundle); + if (stateCommunicationVersion < RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_VERSION) { + activatedClusterStateVersion = stateBundle.getVersion(); // Simulate node that does not know of activation + } timer.notifyAll(); } log.log(LogLevel.DEBUG, "Dummy node " + this + ": Got new cluster state " + stateBundle); } catch (Exception e) { - log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occured when answering setdistributionstates request: " + e.getMessage()); + log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occurred when answering setdistributionstates request: " + e.getMessage()); + e.printStackTrace(System.err); + req.setError(ErrorCode.METHOD_FAILED, e.getMessage()); + } + } + + public void rpc_activateClusterStateVersion(Request req) { + try { + if (shouldFailSetSystemStateRequests()) { + // We assume that failing setDistributionStates also implies failing version activations + req.setError(ErrorCode.GENERAL_ERROR, "Dummy node configured to fail activateClusterStateVersion() calls"); + return; + } + int activateVersion = req.parameters().get(0).asInt32(); + synchronized(timer) { + int actualVersion = getLatestSystemStateVersion().orElse(0); + req.returnValues().add(new Int32Value(actualVersion)); + if (activateVersion == actualVersion) { + activatedClusterStateVersion = activateVersion; + timer.notifyAll(); + } else { + log.log(LogLevel.DEBUG, () -> String.format("Dummy node %s: got a mismatching activation (request version %d, " + + "actual %d), not marking version as active", this, activateVersion, actualVersion)); + } + } + log.log(LogLevel.DEBUG, "Dummy node " + this + ": Activating cluster state version " + activateVersion); + } catch (Exception e) { + log.log(LogLevel.ERROR, "Dummy node " + this + ": An error occurred when answering activate_cluster_state_version request: " + e.getMessage()); e.printStackTrace(System.err); req.setError(ErrorCode.METHOD_FAILED, e.getMessage()); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java index bda06248d9e..bf63aebe022 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNodeOptions.java @@ -8,6 +8,6 @@ import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; * over regular RPC. */ public class DummyVdsNodeOptions { - // 0 - 4.1, 1 - 4.2-5.0.10, 2 - 5.0.11+, 3 - 6.220+ - public int stateCommunicationVersion = RPCCommunicator.SET_DISTRIBUTION_STATES_RPC_VERSION; + // 0 - 4.1, 1 - 4.2-5.0.10, 2 - 5.0.11+, 3 - 6.220+, 4 - 7.24+ + public int stateCommunicationVersion = RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_VERSION; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index 21d9b0a7a1f..5ecc33ae964 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -119,6 +119,18 @@ public abstract class FleetControllerTest implements Waiter { testName = name; } + static protected FleetControllerOptions defaultOptions(String clusterName) { + var opts = new FleetControllerOptions(clusterName); + opts.enableTwoPhaseClusterStateActivation = true; // Enable by default, tests can explicitly disable. + return opts; + } + + static protected FleetControllerOptions defaultOptions(String clusterName, Collection<ConfiguredNode> nodes) { + var opts = new FleetControllerOptions(clusterName, nodes); + opts.enableTwoPhaseClusterStateActivation = true; // Enable by default, tests can explicitly disable. + return opts; + } + protected void setUpSystem(boolean useFakeTimer, FleetControllerOptions options) throws Exception { log.log(LogLevel.DEBUG, "Setting up system"); slobrok = new Slobrok(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java index 4a3eef559aa..4805257ea7a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java @@ -18,7 +18,7 @@ public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { private static FleetControllerOptions createOptions( DistributionBuilder.GroupBuilder groupBuilder, double minNodeRatio) { - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(DistributionBuilder.forHierarchicCluster(groupBuilder)); options.nodes = DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount()) .stream().collect(Collectors.toSet()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index 9d6e39f244a..23389de3fad 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -119,7 +119,7 @@ public class MasterElectionTest extends FleetControllerTest { public void testMasterElection() throws Exception { startingTest("MasterElectionTest::testMasterElection"); log.log(LogLevel.INFO, "STARTING TEST: MasterElectionTest::testMasterElection()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 1; setUpFleetController(5, true, options); waitForMaster(0); @@ -223,7 +223,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void testClusterStateVersionIncreasesAcrossMasterElections() throws Exception { startingTest("MasterElectionTest::testClusterStateVersionIncreasesAcrossMasterElections"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 1; setUpFleetController(5, true, options); // Currently need to have content nodes present for the cluster controller to even bother @@ -248,7 +248,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void testVotingCorrectnessInFaceOfZKDisconnect() throws Exception { startingTest("MasterElectionTest::testVotingCorrectnessInFaceOfZKDisconnect"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); // "Magic" port value is in range allocated to module for testing. zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342); options.zooKeeperSessionTimeout = 100; @@ -272,7 +272,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void testZooKeeperUnavailable() throws Exception { startingTest("MasterElectionTest::testZooKeeperUnavailable"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.zooKeeperSessionTimeout = 100; options.masterZooKeeperCooldownPeriod = 100; options.zooKeeperServerAddress = "localhost"; @@ -308,7 +308,7 @@ public class MasterElectionTest extends FleetControllerTest { @Ignore public void testMasterZooKeeperCooldown() throws Exception { startingTest("MasterElectionTest::testMasterZooKeeperCooldown"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour setUpFleetController(3, true, options); waitForMaster(0); @@ -349,7 +349,7 @@ public class MasterElectionTest extends FleetControllerTest { @Ignore public void testGetMaster() throws Exception { startingTest("MasterElectionTest::testGetMaster"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 3600 * 1000; // An hour setUpFleetController(3, true, options); waitForMaster(0); @@ -429,7 +429,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void testReconfigure() throws Exception { startingTest("MasterElectionTest::testReconfigure"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 1; setUpFleetController(3, true, options); waitForMaster(0); @@ -454,7 +454,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void cluster_state_version_written_to_zookeeper_even_with_empty_send_set() throws Exception { startingTest("MasterElectionTest::cluster_state_version_written_to_zookeeper_even_with_empty_send_set"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 1; options.minRatioOfDistributorNodesUp = 0; options.minRatioOfStorageNodesUp = 0; @@ -500,7 +500,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void previously_published_state_is_taken_into_account_for_default_space_when_controller_bootstraps() throws Exception { startingTest("MasterElectionTest::previously_published_state_is_taken_into_account_for_default_space_when_controller_bootstraps"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.clusterHasGlobalDocumentTypes = true; options.masterZooKeeperCooldownPeriod = 1; options.minTimeBeforeFirstSystemStateBroadcast = 100000; @@ -543,7 +543,7 @@ public class MasterElectionTest extends FleetControllerTest { @Test public void default_space_nodes_not_marked_as_maintenance_when_cluster_has_no_global_document_types() throws Exception { startingTest("MasterElectionTest::default_space_nodes_not_marked_as_maintenance_when_cluster_has_no_global_document_types"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.clusterHasGlobalDocumentTypes = false; options.masterZooKeeperCooldownPeriod = 1; options.minTimeBeforeFirstSystemStateBroadcast = 100000; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java index 22831a04527..dff33d7a6fc 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java @@ -9,7 +9,7 @@ public class NoZooKeeperTest extends FleetControllerTest { @Test public void testWantedStatesInZooKeeper() throws Exception { startingTest("NoZooKeeperTest::testWantedStatesInZooKeeper"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.zooKeeperServerAddress = null; setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java index a2b21f10741..9003e369b59 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java @@ -28,7 +28,7 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest } private FleetControllerOptions optionsForConfiguredNodes(Set<ConfiguredNode> configuredNodes) { - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000; options.maxTransitionTime = transitionTimes(0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index fadafdc7d32..214ccfa1c5b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -85,7 +85,7 @@ public class RpcServerTest extends FleetControllerTest { public void testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork() { try{ startingTest("RpcServerTest::testFailOccasionallyAndIgnoreToSeeIfOtherTestsThenWork"); - setUpFleetController(true, new FleetControllerOptions("mycluster")); + setUpFleetController(true, defaultOptions("mycluster")); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); } catch (Throwable t) {} @@ -95,7 +95,7 @@ public class RpcServerTest extends FleetControllerTest { public void testGetSystemState() throws Exception { LogFormatter.initializeLogging(); startingTest("RpcServerTest::testGetSystemState"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); @@ -165,7 +165,7 @@ public class RpcServerTest extends FleetControllerTest { Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 10; i++) configuredNodes.add(new ConfiguredNode(i, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.minRatioOfStorageNodesUp = 0; options.maxInitProgressTime = 30000; options.stableStateTimePeriod = 60000; @@ -284,7 +284,7 @@ public class RpcServerTest extends FleetControllerTest { for (int i = 0; i < 9; i++) configuredNodes.add(new ConfiguredNode(i, false)); configuredNodes.add(new ConfiguredNode(9, true)); // Last node is configured retired - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.minRatioOfStorageNodesUp = 0; options.maxInitProgressTime = 30000; options.stableStateTimePeriod = 60000; @@ -319,7 +319,7 @@ public class RpcServerTest extends FleetControllerTest { List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0; i < 5; i++) configuredNodes.add(new ConfiguredNode(i, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.maxInitProgressTime = 30000; options.stableStateTimePeriod = 60000; setUpFleetController(true, options); @@ -343,7 +343,7 @@ public class RpcServerTest extends FleetControllerTest { configuredNodes.add(new ConfiguredNode(i, true)); configuredNodes.add(new ConfiguredNode(5, false)); configuredNodes.add(new ConfiguredNode(6, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs; this.options.maxInitProgressTime = 30000; this.options.stableStateTimePeriod = 60000; @@ -373,7 +373,7 @@ public class RpcServerTest extends FleetControllerTest { Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 7; i++) configuredNodes.add(new ConfiguredNode(i, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs; this.options.maxInitProgressTime = 30000; this.options.stableStateTimePeriod = 60000; @@ -400,7 +400,7 @@ public class RpcServerTest extends FleetControllerTest { List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0; i < 5; i++) configuredNodes.add(new ConfiguredNode(i, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.maxInitProgressTime = 30000; options.stableStateTimePeriod = 60000; setUpFleetController(true, options); @@ -412,7 +412,7 @@ public class RpcServerTest extends FleetControllerTest { Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 5; i++) configuredNodes.add(new ConfiguredNode(i, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs; this.options.maxInitProgressTime = 30000; this.options.stableStateTimePeriod = 60000; @@ -427,7 +427,7 @@ public class RpcServerTest extends FleetControllerTest { configuredNodes.add(new ConfiguredNode(i, true)); configuredNodes.add(new ConfiguredNode(5, false)); configuredNodes.add(new ConfiguredNode(6, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs; this.options.maxInitProgressTime = 30000; this.options.stableStateTimePeriod = 60000; @@ -441,7 +441,7 @@ public class RpcServerTest extends FleetControllerTest { configuredNodes.add(new ConfiguredNode(i, true)); configuredNodes.add(new ConfiguredNode(5, false)); configuredNodes.add(new ConfiguredNode(6, false)); - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); options.slobrokConnectionSpecs = this.options.slobrokConnectionSpecs; this.options.maxInitProgressTime = 30000; this.options.stableStateTimePeriod = 60000; @@ -489,7 +489,7 @@ public class RpcServerTest extends FleetControllerTest { @Test public void testSetNodeState() throws Exception { startingTest("RpcServerTest::testSetNodeState"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); Set<Integer> nodeIndexes = new TreeSet<>(Arrays.asList(new Integer[]{4, 6, 9, 10, 14, 16, 21, 22, 23, 25})); options.setStorageDistribution(new Distribution(getDistConfig(nodeIndexes))); setUpFleetController(true, options); @@ -535,7 +535,7 @@ public class RpcServerTest extends FleetControllerTest { @Test public void testSetNodeStateOutOfRange() throws Exception { startingTest("RpcServerTest::testSetNodeStateOutOfRange"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10))); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); @@ -572,7 +572,7 @@ public class RpcServerTest extends FleetControllerTest { @Test public void testGetMaster() throws Exception { startingTest("RpcServerTest::testGetMaster"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10))); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); @@ -594,7 +594,7 @@ public class RpcServerTest extends FleetControllerTest { @Test public void testGetNodeList() throws Exception { startingTest("RpcServerTest::testGetNodeList"); - setUpFleetController(true, new FleetControllerOptions("mycluster")); + setUpFleetController(true, defaultOptions("mycluster")); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcVersionAutoDowngradeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcVersionAutoDowngradeTest.java index d59dbb4933a..b76f1fc20bf 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcVersionAutoDowngradeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcVersionAutoDowngradeTest.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.vdslib.distribution.ConfiguredNode; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.State; +import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter; import org.junit.Test; import java.util.ArrayList; @@ -16,7 +17,7 @@ public class RpcVersionAutoDowngradeTest extends FleetControllerTest { for (int i = 0 ; i < 10; i++) { configuredNodes.add(new ConfiguredNode(i, false)); } - FleetControllerOptions options = new FleetControllerOptions("mycluster", configuredNodes); + FleetControllerOptions options = defaultOptions("mycluster", configuredNodes); setUpFleetController(false, options); DummyVdsNodeOptions nodeOptions = new DummyVdsNodeOptions(); nodeOptions.stateCommunicationVersion = nodeRpcVersion; @@ -29,8 +30,16 @@ public class RpcVersionAutoDowngradeTest extends FleetControllerTest { @Test public void cluster_state_rpc_version_is_auto_downgraded_and_retried_for_older_nodes() throws Exception { - setUpFakeCluster(2); // HEAD is at v3 + setUpFakeCluster(2); // HEAD is at v4 waitForState("version:\\d+ distributor:10 storage:10"); } + @Test + public void implicit_activation_for_nodes_that_return_not_found_for_version_activation_rpc() throws Exception { + setUpFakeCluster(3); // HEAD is at v4 + waitForState("version:\\d+ distributor:10 storage:10"); + } + + // TODO partial version setup for simulating upgrades + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java index 1de5848fc06..209da523705 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java @@ -33,7 +33,7 @@ public class SlobrokTest extends FleetControllerTest { @Test public void testSingleSlobrokRestart() throws Exception { startingTest("SlobrokTest::testSingleSlobrokRestart"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.nodeStateRequestTimeoutMS = 60 * 60 * 1000; options.maxSlobrokDisconnectGracePeriod = 60 * 60 * 1000; setUpFleetController(true, options); @@ -87,7 +87,7 @@ public class SlobrokTest extends FleetControllerTest { @Test public void testNodeTooLongOutOfSlobrok() throws Exception { startingTest("SlobrokTest::testNodeTooLongOutOfSlobrok"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; options.nodeStateRequestTimeoutMS = 10000 * 60 * 1000; setUpFleetController(true, options); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 32de3591f2d..ca246b3549f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -95,7 +95,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNormalStartup() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxInitProgressTime = 50000; initialize(options); @@ -167,10 +167,13 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNodeGoingDownAndUp() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.nodeStateRequestTimeoutMS = 60 * 60 * 1000; options.minTimeBetweenNewSystemStates = 0; options.maxInitProgressTime = 50000; + // This test makes very specific assumptions about the amount of work done in a single tick. + // Two-phase cluster state activation changes this quite a bit, so disable it. At least for now. + options.enableTwoPhaseClusterStateActivation = false; initialize(options); @@ -254,7 +257,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNodeGoingDownAndUpNotifying() throws Exception { // Same test as above, but node manages to notify why it is going down first. - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.nodeStateRequestTimeoutMS = 60 * 60 * 1000; options.maxSlobrokDisconnectGracePeriod = 100000; @@ -325,7 +328,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNodeGoingDownAndUpFast() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; initialize(options); @@ -366,7 +369,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testMaintenanceWhileNormalStorageNodeRestart() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; initialize(options); @@ -426,7 +429,7 @@ public class StateChangeTest extends FleetControllerTest { nodes.add(new ConfiguredNode(i, retired)); } - FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes); + FleetControllerOptions options = defaultOptions("mycluster", nodes); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; initialize(options); @@ -485,7 +488,7 @@ public class StateChangeTest extends FleetControllerTest { nodes.add(new ConfiguredNode(i, retired)); } - FleetControllerOptions options = new FleetControllerOptions("mycluster", nodes); + FleetControllerOptions options = defaultOptions("mycluster", nodes); options.maxSlobrokDisconnectGracePeriod = 60 * 1000; initialize(options); @@ -508,7 +511,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testDownNodeInitializing() throws Exception { // Actually report initializing state if node has been down steadily for a while - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 5000); options.maxInitProgressTime = 5000; options.stableStateTimePeriod = 20000; @@ -571,7 +574,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNodeInitializationStalled() throws Exception { // Node should eventually be marked down, and not become initializing next time, but stay down until up - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 5000); options.maxInitProgressTime = 5000; options.stableStateTimePeriod = 1000000; @@ -657,7 +660,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testBackwardsInitializationProgress() throws Exception { // Same as stalled. Mark down, keep down until up - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 5000); options.maxInitProgressTime = 5000; options.stableStateTimePeriod = 1000000; @@ -700,7 +703,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNodeGoingDownWhileInitializing() throws Exception { // Same as stalled. Mark down, keep down until up - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 5000); options.maxInitProgressTime = 5000; options.stableStateTimePeriod = 1000000; @@ -759,7 +762,7 @@ public class StateChangeTest extends FleetControllerTest { public void testContinuousCrashRightAfterInit() throws Exception { startingTest("StateChangeTest::testContinuousCrashRightAfterInit"); // If node does this too many times, take it out of service - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 5000); options.maxInitProgressTime = 5000; options.maxPrematureCrashes = 2; @@ -813,7 +816,7 @@ public class StateChangeTest extends FleetControllerTest { public void testClusterStateMinNodes() throws Exception { startingTest("StateChangeTest::testClusterStateMinNodes"); // If node does this too many times, take it out of service - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 0); options.maxInitProgressTime = 0; options.minDistributorNodesUp = 6; @@ -868,7 +871,7 @@ public class StateChangeTest extends FleetControllerTest { public void testClusterStateMinFactor() throws Exception { startingTest("StateChangeTest::testClusterStateMinFactor"); // If node does this too many times, take it out of service - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 0); options.maxInitProgressTime = 0; options.minDistributorNodesUp = 0; @@ -941,7 +944,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testNoSystemStateBeforeInitialTimePeriod() throws Exception { startingTest("StateChangeTest::testNoSystemStateBeforeInitialTimePeriod()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.minTimeBeforeFirstSystemStateBroadcast = 3 * 60 * 1000; setUpSystem(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions(), true); @@ -982,7 +985,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testSystemStateSentWhenNodesReplied() throws Exception { startingTest("StateChangeTest::testSystemStateSentWhenNodesReplied()"); - final FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + final FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.minTimeBeforeFirstSystemStateBroadcast = 300 * 60 * 1000; setUpSystem(true, options); @@ -1016,7 +1019,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testDontTagFailingSetSystemStateOk() throws Exception { startingTest("StateChangeTest::testDontTagFailingSetSystemStateOk()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); @@ -1035,7 +1038,7 @@ public class StateChangeTest extends FleetControllerTest { // Assert that the failed node has not acknowledged the latest version. // (The version may still be larger than versionBeforeChange if the fleet controller sends a // "stable system" update without timestamps in the meantime - assertTrue(fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getSystemStateVersionAcknowledged() < versionAfterChange); + assertTrue(fleetController.getCluster().getNodeInfo(nodes.get(1).getNode()).getClusterStateVersionBundleAcknowledged() < versionAfterChange); // Ensure non-concurrent access to getNewestSystemStateVersionSent synchronized(timer) { @@ -1047,7 +1050,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testAlteringDistributionSplitCount() throws Exception { startingTest("StateChangeTest::testAlteringDistributionSplitCount"); - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.distributionBits = 17; initialize(options); @@ -1094,7 +1097,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void testSetAllTimestampsAfterDowntime() throws Exception { startingTest("StateChangeTest::testSetAllTimestampsAfterDowntime"); - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); @@ -1143,7 +1146,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void consolidated_cluster_state_reflects_node_changes_when_cluster_is_down() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 0); options.minStorageNodesUp = 10; options.minDistributorNodesUp = 10; @@ -1177,7 +1180,7 @@ public class StateChangeTest extends FleetControllerTest { // of previous timer invocations (with subsequent state generation) would not be visible. @Test public void timer_events_during_cluster_down_observe_most_recent_node_changes() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 1000); options.minStorageNodesUp = 10; options.minDistributorNodesUp = 10; @@ -1209,7 +1212,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void do_not_emit_multiple_events_when_node_state_does_not_match_versioned_state() throws Exception { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); initialize(options); ctrl.tick(); @@ -1343,11 +1346,19 @@ public class StateChangeTest extends FleetControllerTest { void sendAllDeferredDistributorClusterStateAcks() throws Exception { communicator.sendAllDeferredDistributorClusterStateAcks(); - ctrl.tick(); + ctrl.tick(); // Process cluster state bundle ACKs + if (ctrl.getOptions().enableTwoPhaseClusterStateActivation) { + ctrl.tick(); // Send activations + ctrl.tick(); // Process activation ACKs + } } void processScheduledTask() throws Exception { ctrl.tick(); // Cluster state recompute iteration and send + if (ctrl.getOptions().enableTwoPhaseClusterStateActivation) { + ctrl.tick(); // Send activations + ctrl.tick(); // Process activation ACKs + } ctrl.tick(); // Iff ACKs were received, process version dependent task(s) } @@ -1373,11 +1384,11 @@ public class StateChangeTest extends FleetControllerTest { } private static FleetControllerOptions defaultOptions() { - return new FleetControllerOptions("mycluster", createNodes(10)); + return defaultOptions("mycluster", createNodes(10)); } private static FleetControllerOptions optionsWithZeroTransitionTime() { - FleetControllerOptions options = new FleetControllerOptions("mycluster", createNodes(10)); + FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.maxTransitionTime.put(NodeType.STORAGE, 0); return options; } @@ -1440,7 +1451,7 @@ public class StateChangeTest extends FleetControllerTest { @Test public void no_op_synchronous_remote_task_waits_until_current_state_is_acked() throws Exception { - RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime()); + RemoteTaskFixture fixture = createFixtureWith(optionsWithZeroTransitionTime()); communicator.setShouldDeferDistributorClusterStateAcks(true); fixture.markStorageNodeDown(0); @@ -1524,6 +1535,7 @@ public class StateChangeTest extends FleetControllerTest { FleetControllerOptions options = defaultOptions(); options.minTimeBetweenNewSystemStates = 10_000; RemoteTaskFixture fixture = createFixtureWith(options); + // Have to increment timer here to be able to send state generated by the scheduled task timer.advanceTime(10_000); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java index f208003e46e..007bda04f4e 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java @@ -27,7 +27,7 @@ public class StateGatherTest extends FleetControllerTest { public void testAlwaysHavePendingGetNodeStateRequestTowardsNodes() throws Exception { Logger.getLogger(NodeStateGatherer.class.getName()).setLevel(LogLevel.SPAM); startingTest("StateGatherTest::testOverlappingGetNodeStateRequests"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.nodeStateRequestTimeoutMS = 10 * 60 * 1000; // Force actual message timeout to be lower than request timeout. options.nodeStateRequestTimeoutEarliestPercentage = 80; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java index 45f11a2a0f3..fc18a1b0b8b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StatusPagesTest.java @@ -78,7 +78,7 @@ public class StatusPagesTest extends FleetControllerTest { @Test public void testStatusThroughContainer() throws Exception { startingTest("StatusPagesTest::testStatusThroughContainer()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10))); final StatusHandler.ContainerStatusPageServer statusServer = new StatusHandler.ContainerStatusPageServer(); setUpFleetController(true, options, true, statusServer); @@ -176,7 +176,7 @@ public class StatusPagesTest extends FleetControllerTest { // Set this to true temporary if you want to check status page from browser. Should be false in checked in code always. boolean haltTestToViewStatusPage = false; startingTest("StatusPagesTest::testSimpleConnectionWithSomeContent()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10))); //options.minRatioOfStorageNodesUp = 0.99; if (haltTestToViewStatusPage) { @@ -211,7 +211,7 @@ public class StatusPagesTest extends FleetControllerTest { @Test public void testNodePage() throws Exception { startingTest("StatusPagesTest::testNodePage()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10))); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); @@ -231,7 +231,7 @@ public class StatusPagesTest extends FleetControllerTest { @Test public void testErrorResponseCode() throws Exception { startingTest("StatusPagesTest::testNodePage()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(3, 10))); setUpFleetController(true, options); setUpVdsNodes(true, new DummyVdsNodeOptions()); @@ -348,7 +348,7 @@ public class StatusPagesTest extends FleetControllerTest { @Test public void testStateServing() throws Exception { startingTest("StatusPagesTest::testStateServing()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); setUpFleetController(true, options); fleetController.updateOptions(options, 5); waitForCompleteCycle(); @@ -376,7 +376,7 @@ public class StatusPagesTest extends FleetControllerTest { @Test public void testClusterStateServing() throws Exception { startingTest("StatusPagesTest::testClusterStateServing()"); - FleetControllerOptions options = new FleetControllerOptions("mycluster"); + FleetControllerOptions options = defaultOptions("mycluster"); setUpFleetController(true, options); fleetController.updateOptions(options, 5); waitForCompleteCycle(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcasterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcasterTest.java index aa5219147ce..f99df6a25b2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcasterTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcasterTest.java @@ -6,12 +6,17 @@ import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; import com.yahoo.vespa.clustercontroller.core.listeners.NodeAddedOrRemovedListener; import com.yahoo.vespa.clustercontroller.core.listeners.NodeStateOrHostInfoChangeHandler; import org.junit.Test; +import org.mockito.ArgumentCaptor; import java.util.stream.Stream; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNull; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; public class SystemStateBroadcasterTest { @@ -21,6 +26,8 @@ public class SystemStateBroadcasterTest { final Object monitor = new Object(); SystemStateBroadcaster broadcaster = new SystemStateBroadcaster(timer, monitor); Communicator mockCommunicator = mock(Communicator.class); + DatabaseHandler mockDatabaseHandler = mock(DatabaseHandler.class); + FleetController mockFleetController = mock(FleetController.class); void simulateNodePartitionedAwaySilently(ClusterFixture cf) { cf.cluster().getNodeInfo(Node.ofStorage(0)).setStartTimestamp(600); @@ -32,6 +39,18 @@ public class SystemStateBroadcasterTest { cf.cluster().getNodeInfo(Node.ofDistributor(0)).setReportedState(new NodeState(NodeType.DISTRIBUTOR, State.DOWN).setStartTimestamp(500), 2000); cf.cluster().getNodeInfo(Node.ofDistributor(0)).setReportedState(new NodeState(NodeType.DISTRIBUTOR, State.UP).setStartTimestamp(500), 3000); } + + void simulateBroadcastTick(ClusterFixture cf) { + broadcaster.processResponses(); + broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), mockCommunicator); + try { + broadcaster.checkIfClusterStateIsAckedByAllDistributors( + mockDatabaseHandler, dbContextFrom(cf.cluster()), mockFleetController); + } catch (Exception e) { + throw new RuntimeException(e); + } + broadcaster.broadcastStateActivationsIfRequired(dbContextFrom(cf.cluster()), mockCommunicator); + } } private static DatabaseHandler.Context dbContextFrom(ContentCluster cluster) { @@ -68,7 +87,7 @@ public class SystemStateBroadcasterTest { ClusterStateBundle stateBundle = ClusterStateBundleUtil.makeBundle("distributor:2 storage:2"); ClusterFixture cf = ClusterFixture.forFlatCluster(2).bringEntireClusterUp().assignDummyRpcAddresses(); f.broadcaster.handleNewClusterStates(stateBundle); - f.broadcaster.broadcastNewState(dbContextFrom(cf.cluster()), f.mockCommunicator); + f.broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), f.mockCommunicator); cf.cluster().getNodeInfo().forEach(nodeInfo -> verify(f.mockCommunicator).setSystemState(eq(stateBundle), eq(nodeInfo), any())); } @@ -79,7 +98,7 @@ public class SystemStateBroadcasterTest { ClusterFixture cf = ClusterFixture.forFlatCluster(2).bringEntireClusterUp().assignDummyRpcAddresses(); f.simulateNodePartitionedAwaySilently(cf); f.broadcaster.handleNewClusterStates(stateBundle); - f.broadcaster.broadcastNewState(dbContextFrom(cf.cluster()), f.mockCommunicator); + f.broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), f.mockCommunicator); clusterNodeInfos(cf.cluster(), Node.ofDistributor(1), Node.ofStorage(0), Node.ofStorage(1)).forEach(nodeInfo -> { // Only distributor 0 should observe startup timestamps @@ -97,7 +116,7 @@ public class SystemStateBroadcasterTest { StateMapping.of("upsidedown", "distributor:2 .0.s:d storage:2")); ClusterFixture cf = ClusterFixture.forFlatCluster(2).bringEntireClusterUp().assignDummyRpcAddresses(); f.broadcaster.handleNewClusterStates(stateBundle); - f.broadcaster.broadcastNewState(dbContextFrom(cf.cluster()), f.mockCommunicator); + f.broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), f.mockCommunicator); cf.cluster().getNodeInfo().forEach(nodeInfo -> verify(f.mockCommunicator).setSystemState(eq(stateBundle), eq(nodeInfo), any())); } @@ -111,7 +130,7 @@ public class SystemStateBroadcasterTest { ClusterFixture cf = ClusterFixture.forFlatCluster(2).bringEntireClusterUp().assignDummyRpcAddresses(); f.simulateNodePartitionedAwaySilently(cf); f.broadcaster.handleNewClusterStates(stateBundle); - f.broadcaster.broadcastNewState(dbContextFrom(cf.cluster()), f.mockCommunicator); + f.broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), f.mockCommunicator); clusterNodeInfos(cf.cluster(), Node.ofDistributor(1), Node.ofStorage(0), Node.ofStorage(1)).forEach(nodeInfo -> { // Only distributor 0 should observe startup timestamps @@ -122,4 +141,200 @@ public class SystemStateBroadcasterTest { StateMapping.of("upsidedown", "distributor:2 .0.s:d storage:2 .0.t:600 .1.t:700")); verify(f.mockCommunicator).setSystemState(eq(expectedDistr0Bundle), eq(cf.cluster().getNodeInfo(Node.ofDistributor(0))), any()); } + + private static class MockSetClusterStateRequest extends SetClusterStateRequest { + public MockSetClusterStateRequest(NodeInfo nodeInfo, int clusterStateVersion) { + super(nodeInfo, clusterStateVersion); + } + } + + private static class MockActivateClusterStateVersionRequest extends ActivateClusterStateVersionRequest { + public MockActivateClusterStateVersionRequest(NodeInfo nodeInfo, int systemStateVersion) { + super(nodeInfo, systemStateVersion); + } + } + + private static void respondToSetClusterStateBundle(NodeInfo nodeInfo, + ClusterStateBundle stateBundle, + Communicator.Waiter<SetClusterStateRequest> waiter) { + // Have to patch in that we've actually sent the bundle in the first place... + nodeInfo.setClusterStateVersionBundleSent(stateBundle); + + var req = new MockSetClusterStateRequest(nodeInfo, stateBundle.getVersion()); + req.setReply(new ClusterStateVersionSpecificRequest.Reply()); + waiter.done(req); + } + + private static void respondToActivateClusterStateVersion(NodeInfo nodeInfo, + ClusterStateBundle stateBundle, + int actualVersion, + Communicator.Waiter<ActivateClusterStateVersionRequest> waiter) { + // Have to patch in that we've actually sent the bundle in the first place... + nodeInfo.setClusterStateVersionActivationSent(stateBundle.getVersion()); + + var req = new MockActivateClusterStateVersionRequest(nodeInfo, stateBundle.getVersion()); + req.setReply(ClusterStateVersionSpecificRequest.Reply.withActualVersion(actualVersion)); + waiter.done(req); + } + + private static void respondToActivateClusterStateVersion(NodeInfo nodeInfo, + ClusterStateBundle stateBundle, + Communicator.Waiter<ActivateClusterStateVersionRequest> waiter) { + respondToActivateClusterStateVersion(nodeInfo, stateBundle, stateBundle.getVersion(), waiter); + } + + private static class StateActivationFixture extends Fixture { + ClusterStateBundle stateBundle; + ClusterFixture cf; + + @SuppressWarnings("rawtypes") // Java generics <3 + final ArgumentCaptor<Communicator.Waiter> d0Waiter; + @SuppressWarnings("rawtypes") + final ArgumentCaptor<Communicator.Waiter> d1Waiter; + + private StateActivationFixture(boolean enableDeferred) { + super(); + stateBundle = ClusterStateBundleUtil + .makeBundleBuilder("version:123 distributor:2 storage:2") + .deferredActivation(enableDeferred) + .deriveAndBuild(); + cf = ClusterFixture.forFlatCluster(2).bringEntireClusterUp().assignDummyRpcAddresses(); + broadcaster.handleNewClusterStates(stateBundle); + broadcaster.broadcastNewStateBundleIfRequired(dbContextFrom(cf.cluster()), mockCommunicator); + + d0Waiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + d1Waiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + } + + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + void expectSetSystemStateInvocationsToBothDistributors() { + clusterNodeInfos(cf.cluster(), Node.ofDistributor(0), Node.ofDistributor(1)).forEach(nodeInfo -> { + verify(mockCommunicator).setSystemState(eq(stateBundle), eq(nodeInfo), + (nodeInfo.getNodeIndex() == 0 ? d0Waiter : d1Waiter).capture()); + }); + } + + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + void ackStateBundleFromBothDistributors() { + expectSetSystemStateInvocationsToBothDistributors(); + simulateBroadcastTick(cf); + + respondToSetClusterStateBundle(cf.cluster.getNodeInfo(Node.ofDistributor(0)), stateBundle, d0Waiter.getValue()); + respondToSetClusterStateBundle(cf.cluster.getNodeInfo(Node.ofDistributor(1)), stateBundle, d1Waiter.getValue()); + simulateBroadcastTick(cf); + } + + static StateActivationFixture withTwoPhaseEnabled() { + return new StateActivationFixture(true); + } + + static StateActivationFixture withTwoPhaseDisabled() { + return new StateActivationFixture(false); + } + } + + @Test + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + public void activation_not_sent_before_all_distributors_have_acked_state_bundle() { + var f = StateActivationFixture.withTwoPhaseEnabled(); + var cf = f.cf; + + f.expectSetSystemStateInvocationsToBothDistributors(); + f.simulateBroadcastTick(cf); + + // Respond from distributor 0, but not yet from distributor 1 + respondToSetClusterStateBundle(cf.cluster.getNodeInfo(Node.ofDistributor(0)), f.stateBundle, f.d0Waiter.getValue()); + f.simulateBroadcastTick(cf); + + // No activations should be sent yet + cf.cluster().getNodeInfo().forEach(nodeInfo -> { + verify(f.mockCommunicator, times(0)).activateClusterStateVersion(eq(123), eq(nodeInfo), any()); + }); + assertNull(f.broadcaster.getLastClusterStateBundleConverged()); + + respondToSetClusterStateBundle(cf.cluster.getNodeInfo(Node.ofDistributor(1)), f.stateBundle, f.d1Waiter.getValue()); + f.simulateBroadcastTick(cf); + + // Activation should now be sent to _all_ nodes (distributor and storage) + cf.cluster().getNodeInfo().forEach(nodeInfo -> { + verify(f.mockCommunicator).activateClusterStateVersion(eq(123), eq(nodeInfo), any()); + }); + // But not converged yet, as activations have not been ACKed + assertNull(f.broadcaster.getLastClusterStateBundleConverged()); + } + + @Test + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + public void state_bundle_not_considered_converged_until_activation_acked_by_all_distributors() { + var f = StateActivationFixture.withTwoPhaseEnabled(); + var cf = f.cf; + + f.ackStateBundleFromBothDistributors(); + + final var d0ActivateWaiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + final var d1ActivateWaiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + + clusterNodeInfos(cf.cluster(), Node.ofDistributor(0), Node.ofDistributor(1)).forEach(nodeInfo -> { + verify(f.mockCommunicator).activateClusterStateVersion(eq(123), eq(nodeInfo), + (nodeInfo.getNodeIndex() == 0 ? d0ActivateWaiter : d1ActivateWaiter).capture()); + }); + + respondToActivateClusterStateVersion(cf.cluster.getNodeInfo(Node.ofDistributor(0)), + f.stateBundle, d0ActivateWaiter.getValue()); + f.simulateBroadcastTick(cf); + + assertNull(f.broadcaster.getLastClusterStateBundleConverged()); // Not yet converged + + respondToActivateClusterStateVersion(cf.cluster.getNodeInfo(Node.ofDistributor(1)), + f.stateBundle, d1ActivateWaiter.getValue()); + f.simulateBroadcastTick(cf); + + // Finally, all distributors have ACKed the version! State is marked as converged. + assertEquals(f.stateBundle, f.broadcaster.getLastClusterStateBundleConverged()); + } + + @Test + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + public void activation_not_sent_if_deferred_activation_is_disabled_in_state_bundle() { + var f = StateActivationFixture.withTwoPhaseDisabled(); + var cf = f.cf; + + f.ackStateBundleFromBothDistributors(); + + // At this point the cluster state shall be considered converged. + assertEquals(f.stateBundle, f.broadcaster.getLastClusterStateBundleConverged()); + + // No activations shall have been sent. + clusterNodeInfos(cf.cluster(), Node.ofDistributor(0), Node.ofDistributor(1)).forEach(nodeInfo -> { + verify(f.mockCommunicator, times(0)).activateClusterStateVersion(eq(123), eq(nodeInfo), any()); + }); + } + + @Test + @SuppressWarnings("unchecked") // Type erasure of Waiter in mocked argument capture + public void activation_convergence_considers_actual_version_returned_from_node() { + var f = StateActivationFixture.withTwoPhaseEnabled(); + var cf = f.cf; + + f.ackStateBundleFromBothDistributors(); + + final var d0ActivateWaiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + final var d1ActivateWaiter = ArgumentCaptor.forClass(Communicator.Waiter.class); + + clusterNodeInfos(cf.cluster(), Node.ofDistributor(0), Node.ofDistributor(1)).forEach(nodeInfo -> { + verify(f.mockCommunicator).activateClusterStateVersion(eq(123), eq(nodeInfo), + (nodeInfo.getNodeIndex() == 0 ? d0ActivateWaiter : d1ActivateWaiter).capture()); + }); + + respondToActivateClusterStateVersion(cf.cluster.getNodeInfo(Node.ofDistributor(0)), + f.stateBundle, d0ActivateWaiter.getValue()); + // Distributor 1 reports higher actual version, should not cause this version to be + // considered converged since it's not an exact version match. + respondToActivateClusterStateVersion(cf.cluster.getNodeInfo(Node.ofDistributor(1)), + f.stateBundle, 124, d1ActivateWaiter.getValue()); + f.simulateBroadcastTick(cf); + + assertNull(f.broadcaster.getLastClusterStateBundleConverged()); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java index 6e5b40aa7d9..bc7ee6adee1 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java @@ -10,7 +10,7 @@ public class WantedStateTest extends FleetControllerTest { @Test public void testSettingStorageNodeMaintenanceAndBack() throws Exception { startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()"); - setUpFleetController(true, new FleetControllerOptions("mycluster")); + setUpFleetController(true, defaultOptions("mycluster")); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); @@ -24,7 +24,7 @@ public class WantedStateTest extends FleetControllerTest { @Test public void testOverridingWantedStateOtherReason() throws Exception { startingTest("WantedStateTest::testOverridingWantedStateOtherReason()"); - setUpFleetController(true, new FleetControllerOptions("mycluster")); + setUpFleetController(true, defaultOptions("mycluster")); setUpVdsNodes(true, new DummyVdsNodeOptions()); waitForStableSystem(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java index 7602f0c83a2..9eb98f4f045 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/RPCCommunicatorTest.java @@ -99,7 +99,7 @@ public class RPCCommunicatorTest { (RequestWaiter)any()); } - private static class Fixture { + private static class Fixture<RequestType> { final Supervisor mockSupervisor = mock(Supervisor.class); final Target mockTarget = mock(Target.class); final Timer timer = new FakeTimer(); @@ -107,7 +107,7 @@ public class RPCCommunicatorTest { final AtomicReference<Request> receivedRequest = new AtomicReference<>(); final AtomicReference<RequestWaiter> receivedWaiter = new AtomicReference<>(); @SuppressWarnings("unchecked") // Cannot mock with "compiler-obvious" type safety for generics - final Communicator.Waiter<SetClusterStateRequest> mockWaiter = mock(Communicator.Waiter.class); + final Communicator.Waiter<RequestType> mockWaiter = mock(Communicator.Waiter.class); Fixture() { communicator = new RPCCommunicator( @@ -131,9 +131,9 @@ public class RPCCommunicatorTest { @Test public void setSystemState_v3_sends_distribution_states_rpc() { - Fixture f = new Fixture(); - ClusterFixture cf = ClusterFixture.forFlatCluster(3).bringEntireClusterUp().assignDummyRpcAddresses(); - ClusterStateBundle sentBundle = ClusterStateBundleUtil.makeBundle("distributor:3 storage:3"); + var f = new Fixture<SetClusterStateRequest>(); + var cf = ClusterFixture.forFlatCluster(3).bringEntireClusterUp().assignDummyRpcAddresses(); + var sentBundle = ClusterStateBundleUtil.makeBundle("distributor:3 storage:3"); f.communicator.setSystemState(sentBundle, cf.cluster().getNodeInfo(Node.ofStorage(1)), f.mockWaiter); Request req = f.receivedRequest.get(); @@ -147,9 +147,9 @@ public class RPCCommunicatorTest { @Test public void set_distribution_states_v3_rpc_auto_downgrades_to_v2_on_unknown_method_error() { - Fixture f = new Fixture(); - ClusterFixture cf = ClusterFixture.forFlatCluster(3).bringEntireClusterUp().assignDummyRpcAddresses(); - ClusterStateBundle sentBundle = ClusterStateBundleUtil.makeBundle("version:123 distributor:3 storage:3"); + var f = new Fixture<SetClusterStateRequest>(); + var cf = ClusterFixture.forFlatCluster(3).bringEntireClusterUp().assignDummyRpcAddresses(); + var sentBundle = ClusterStateBundleUtil.makeBundle("version:123 distributor:3 storage:3"); f.communicator.setSystemState(sentBundle, cf.cluster().getNodeInfo(Node.ofStorage(1)), f.mockWaiter); RequestWaiter waiter = f.receivedWaiter.get(); @@ -161,7 +161,7 @@ public class RPCCommunicatorTest { waiter.handleRequestDone(req); // This would normally be done in processResponses(), but that code path is not invoked in this test. - cf.cluster().getNodeInfo(Node.ofStorage(1)).setSystemStateVersionAcknowledged(123, false); + cf.cluster().getNodeInfo(Node.ofStorage(1)).setClusterStateBundleVersionAcknowledged(123, false); f.receivedRequest.set(null); // Now when we try again, we should have been downgraded to the legacy setsystemstate2 RPC @@ -171,4 +171,17 @@ public class RPCCommunicatorTest { assertThat(req.methodName(), equalTo(RPCCommunicator.LEGACY_SET_SYSTEM_STATE2_RPC_METHOD_NAME)); } + @Test + public void activateClusterStateVersion_sends_version_activation_rpc() { + var f = new Fixture<ActivateClusterStateVersionRequest>(); + var cf = ClusterFixture.forFlatCluster(3).bringEntireClusterUp().assignDummyRpcAddresses(); + f.communicator.activateClusterStateVersion(12345, cf.cluster().getNodeInfo(Node.ofDistributor(1)), f.mockWaiter); + + Request req = f.receivedRequest.get(); + assertThat(req, notNullValue()); + assertThat(req.methodName(), equalTo(RPCCommunicator.ACTIVATE_CLUSTER_STATE_VERSION_RPC_METHOD_NAME)); + assertTrue(req.parameters().satisfies("i")); // <cluster state version> + assertThat(req.parameters().get(0).asInt32(), equalTo(12345)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodecTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodecTest.java index b19b1d780bf..3dce1153685 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodecTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodecTest.java @@ -72,4 +72,20 @@ public class SlimeClusterStateBundleCodecTest { assertThat(roundtripEncodeWithEnvelope(stateBundle), equalTo(stateBundle)); } + @Test + public void can_roundtrip_encode_bundle_with_deferred_activation_enabled() { + var stateBundle = ClusterStateBundleUtil.makeBundleBuilder("distributor:2 storage:2") + .deferredActivation(true) + .deriveAndBuild(); + assertThat(roundtripEncode(stateBundle), equalTo(stateBundle)); + } + + @Test + public void can_roundtrip_encode_bundle_with_deferred_activation_disabled() { + var stateBundle = ClusterStateBundleUtil.makeBundleBuilder("distributor:2 storage:2") + .deferredActivation(false) + .deriveAndBuild(); + assertThat(roundtripEncode(stateBundle), equalTo(stateBundle)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java index d140ef998b6..9734156b13f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java @@ -29,6 +29,7 @@ public interface WaitCondition { abstract class StateWait implements WaitCondition { private final Object monitor; protected ClusterState currentState; + protected ClusterState convergedState; private final SystemStateListener listener = new SystemStateListener() { @Override public void handleNewPublishedState(ClusterStateBundle state) { @@ -37,6 +38,14 @@ public interface WaitCondition { monitor.notifyAll(); } } + + @Override + public void handleStateConvergedInCluster(ClusterStateBundle states) { + synchronized (monitor) { + currentState = convergedState = states.getBaselineClusterState(); + monitor.notifyAll(); + } + } }; public StateWait(FleetController fc, Object monitor) { @@ -90,8 +99,8 @@ public interface WaitCondition { @Override public String isConditionMet() { - if (currentState != null) { - lastCheckedState = currentState; + if (convergedState != null) { + lastCheckedState = convergedState; Matcher m = pattern.matcher(lastCheckedState.toString()); if (m.matches() || !checkSpaceSubset.isEmpty()) { if (nodesToCheck != null) { diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/MapEvaluationTypeContext.java b/config-model/src/main/java/com/yahoo/searchdefinition/MapEvaluationTypeContext.java index 0d9ea00bf73..a0f35dbefe6 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/MapEvaluationTypeContext.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/MapEvaluationTypeContext.java @@ -37,6 +37,8 @@ public class MapEvaluationTypeContext extends FunctionReferenceContext implement private final Map<Reference, TensorType> featureTypes = new HashMap<>(); + private final Map<Reference, TensorType> resolvedTypes = new HashMap<>(); + /** For invocation loop detection */ private final Deque<Reference> currentResolutionCallStack; @@ -63,8 +65,24 @@ public class MapEvaluationTypeContext extends FunctionReferenceContext implement throw new UnsupportedOperationException("Not able to parse gereral references from string form"); } + public void forgetResolvedTypes() { + resolvedTypes.clear(); + } + @Override public TensorType getType(Reference reference) { + // computeIfAbsent without concurrent modification due to resolve adding more resolved entries: + TensorType resolvedType = resolvedTypes.get(reference); + if (resolvedType != null) return resolvedType; + + resolvedType = resolveType(reference); + if (resolvedType == null) + return defaultTypeOf(reference); // Don't store fallback to default as we may know more later + resolvedTypes.put(reference, resolvedType); + return resolvedType; + } + + private TensorType resolveType(Reference reference) { if (currentResolutionCallStack.contains(reference)) throw new IllegalArgumentException("Invocation loop: " + currentResolutionCallStack.stream().map(Reference::toString).collect(Collectors.joining(" -> ")) + @@ -90,7 +108,7 @@ public class MapEvaluationTypeContext extends FunctionReferenceContext implement // The argument may be a local identifier bound to the actual value String argument = reference.simpleArgument().get(); reference = Reference.simple(reference.name(), bindings.getOrDefault(argument, argument)); - return featureTypes.getOrDefault(reference, defaultTypeOf(reference)); + return featureTypes.get(reference); } // A reference to a function? diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java index bc49c40e4e1..b3853b36aa5 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/RankProfile.java @@ -738,7 +738,7 @@ public class RankProfile implements Serializable, Cloneable { * Creates a context containing the type information of all constants, attributes and query profiles * referable from this rank profile. */ - public TypeContext<Reference> typeContext(QueryProfileRegistry queryProfiles) { + public MapEvaluationTypeContext typeContext(QueryProfileRegistry queryProfiles) { MapEvaluationTypeContext context = new MapEvaluationTypeContext(getFunctions().values().stream() .map(RankingExpressionFunction::function) .collect(Collectors.toList())); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/ml/ConvertedModel.java b/config-model/src/main/java/com/yahoo/vespa/model/ml/ConvertedModel.java index 93848c067e0..f197e2dfe6d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/ml/ConvertedModel.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/ml/ConvertedModel.java @@ -12,6 +12,7 @@ import com.yahoo.io.IOUtils; import com.yahoo.path.Path; import com.yahoo.search.query.profile.QueryProfileRegistry; import com.yahoo.searchdefinition.FeatureNames; +import com.yahoo.searchdefinition.MapEvaluationTypeContext; import com.yahoo.searchdefinition.RankProfile; import com.yahoo.searchdefinition.RankingConstant; import com.yahoo.searchdefinition.expressiontransforms.RankProfileTransformContext; @@ -371,7 +372,7 @@ public class ConvertedModel { */ private static void reduceBatchDimensions(RankingExpression expression, ImportedMlModel model, RankProfile profile, QueryProfileRegistry queryProfiles) { - TypeContext<Reference> typeContext = profile.typeContext(queryProfiles); + MapEvaluationTypeContext typeContext = profile.typeContext(queryProfiles); TensorType typeBeforeReducing = expression.getRoot().type(typeContext); // Check generated functions for inputs to reduce @@ -398,7 +399,7 @@ public class ConvertedModel { } private static ExpressionNode reduceBatchDimensionsAtInput(ExpressionNode node, ImportedMlModel model, - TypeContext<Reference> typeContext) { + MapEvaluationTypeContext typeContext) { if (node instanceof TensorFunctionNode) { TensorFunction tensorFunction = ((TensorFunctionNode) node).function(); if (tensorFunction instanceof Rename) { @@ -428,7 +429,7 @@ public class ConvertedModel { return node; } - private static ExpressionNode reduceBatchDimensionExpression(TensorFunction function, TypeContext<Reference> context) { + private static ExpressionNode reduceBatchDimensionExpression(TensorFunction function, MapEvaluationTypeContext context) { TensorFunction result = function; TensorType type = function.type(context); if (type.dimensions().size() > 1) { @@ -440,6 +441,7 @@ public class ConvertedModel { } if (reduceDimensions.size() > 0) { result = new Reduce(function, Reduce.Aggregator.sum, reduceDimensions); + context.forgetResolvedTypes(); // We changed types } } return new TensorFunctionNode(result); diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/GeminiTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/GeminiTestCase.java index 4bc61f20d95..992e52a9e5b 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/derived/GeminiTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/GeminiTestCase.java @@ -23,7 +23,7 @@ public class GeminiTestCase extends AbstractExportingTestCase { Map<String, String> ranking = removePartKeySuffixes(asMap(p.configProperties())); assertEquals("attribute(right)", resolve(lookup("toplevel", ranking), ranking)); } - + private Map<String, String> asMap(List<Pair<String, String>> properties) { Map<String, String> map = new HashMap<>(); for (Pair<String, String> property : properties) @@ -45,7 +45,7 @@ public class GeminiTestCase extends AbstractExportingTestCase { } /** - * Recurively resolves references to other ranking expressions - rankingExpression(name) - + * Recursively resolves references to other ranking expressions - rankingExpression(name) - * and replaces the reference by the expression */ private String resolve(String expression, Map<String, String> ranking) { diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 04c9e3b7c73..62f3b6759c3 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -166,3 +166,15 @@ cluster_has_global_document_types bool default=false ## Bucket merges are considered complete when: ## ((buckets_total - buckets_pending) / buckets_total)) >= min_merge_completion_ratio min_merge_completion_ratio double default=1.0 + +## If enabled, cluster state transitions are performed as two distinct phases: +## +## 1) state bundle propagation and bucket info gathering phase +## 2) state activation phase, which is not performed until all nodes have completed phase 1 +## +## This is to enable read-only operations to pass through the system during phase 1 +## while nodes await phase 2. If this feature is disabled, nodes will implicitly do +## phase 2 as part of phase 1 at their own leisure, which means that actual state +## activation may happen at wildly different times throughout the cluster. The 2 phase +## transition logic aims to minimize the window of time where active states diverge. +enable_two_phase_cluster_state_transitions bool default=false diff --git a/container-core/src/main/java/com/yahoo/restapi/Path.java b/container-core/src/main/java/com/yahoo/restapi/Path.java index 3aa23fbc916..79f70168696 100644 --- a/container-core/src/main/java/com/yahoo/restapi/Path.java +++ b/container-core/src/main/java/com/yahoo/restapi/Path.java @@ -54,7 +54,7 @@ public class Path { values.clear(); String[] specElements = pathSpec.split("/"); boolean matchPrefix = false; - if (specElements[specElements.length-1].equals("{*}")) { + if (specElements.length > 1 && specElements[specElements.length-1].equals("{*}")) { matchPrefix = true; specElements = Arrays.copyOf(specElements, specElements.length-1); } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/zone/ZoneRegistry.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/zone/ZoneRegistry.java index b7303480701..d085d00baaa 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/zone/ZoneRegistry.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/zone/ZoneRegistry.java @@ -40,9 +40,6 @@ public interface ZoneRegistry { /** Returns all possible API endpoints of all known config servers and config server VIPs in the given zone */ List<URI> getConfigServerApiUris(ZoneId zoneId); - /** Returns a URL with the logs for the given deployment, if logging is configured for its zone */ - default Optional<URI> getLogServerUri(DeploymentId deploymentId) { return Optional.empty(); }; - /** Returns the time to live for deployments in the given zone, or empty if this is infinite */ Optional<Duration> getDeploymentTimeToLive(ZoneId zoneId); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index aacc61655fc..de7f12efcaf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -70,9 +70,10 @@ public class Upgrader extends Maintainer { cancelUpgradesOf(applications().with(UpgradePolicy.conservative).upgrading().failing().notUpgradingTo(conservativeTargets), reason); // Schedule the right upgrades - canaryTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.canary), target)); - defaultTargets.forEach(target -> upgrade(applications().with(UpgradePolicy.defaultPolicy), target)); - conservativeTargets.forEach(target -> upgrade(applications().with(UpgradePolicy.conservative), target)); + ApplicationList applications = applications(); + canaryTarget.ifPresent(target -> upgrade(applications.with(UpgradePolicy.canary), target)); + defaultTargets.forEach(target -> upgrade(applications.with(UpgradePolicy.defaultPolicy), target)); + conservativeTargets.forEach(target -> upgrade(applications.with(UpgradePolicy.conservative), target)); } /** Returns the target versions for given confidence, one per major version in the system */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/proxy/ConfigServerRestExecutorImpl.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/proxy/ConfigServerRestExecutorImpl.java index a75d0afbad0..50c70f3e55b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/proxy/ConfigServerRestExecutorImpl.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/proxy/ConfigServerRestExecutorImpl.java @@ -81,7 +81,7 @@ public class ConfigServerRestExecutorImpl implements ConfigServerRestExecutor { // Make a local copy of the list as we want to manipulate it in case of ping problems. List<URI> allServers = zoneRegistry.getConfigServerVipUri(zoneId) // TODO: Use config server VIP for all zones that have one - .filter(zone -> zoneId.region().value().startsWith("aws-") || zoneId.region().value().startsWith("cd-aws-")) + .filter(zone -> zoneId.region().value().startsWith("aws-") || zoneId.region().value().contains("-aws-")) .map(Collections::singletonList) .orElseGet(() -> new ArrayList<>(zoneRegistry.getConfigServerUris(zoneId))); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index a0d807ac333..664a310c966 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -553,10 +553,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { .ifPresent(endpoints -> endpoints.forEach(endpoint -> serviceUrlArray.addString(endpoint.toString()))); response.setString("nodes", withPath("/zone/v2/" + deploymentId.zoneId().environment() + "/" + deploymentId.zoneId().region() + "/nodes/v2/node/?&recursive=true&application=" + deploymentId.applicationId().tenant() + "." + deploymentId.applicationId().application() + "." + deploymentId.applicationId().instance(), request.getUri()).toString()); - - controller.zoneRegistry().getLogServerUri(deploymentId) - .ifPresent(elkUrl -> response.setString("elkUrl", elkUrl.toString())); - response.setString("yamasUrl", monitoringSystemUri(deploymentId).toString()); response.setString("version", deployment.version().toFullString()); response.setString("revision", deployment.applicationVersion().id()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/role/PathGroup.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/role/PathGroup.java index 653c1d40684..ea54c23702d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/role/PathGroup.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/role/PathGroup.java @@ -46,7 +46,7 @@ public enum PathGroup { "/application/v4/tenant/{tenant}/application/", "/application/v4/tenant/{tenant}/application/{application}", "/application/v4/tenant/{tenant}/application/{application}/deploying/{*}", - "/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{job}/{*}", + "/application/v4/tenant/{tenant}/application/{application}/instance/{*}", "/application/v4/tenant/{tenant}/application/{application}/environment/dev/{*}", "/application/v4/tenant/{tenant}/application/{application}/environment/perf/{*}", "/application/v4/tenant/{tenant}/application/{application}/environment/prod/region/{region}/instance/{instance}/global-rotation/override"), @@ -62,7 +62,12 @@ public enum PathGroup { /** Read-only paths providing information related to deployments */ deploymentStatus("/badge/v1/{*}", "/deployment/v1/{*}", - "/zone/v1/{*}"); + "/zone/v1/{*}"), + + /** Paths used by some dashboard */ + dashboard("/", + "/d/{*}", + "/statuspage/v1/{*}"); final Set<String> pathSpecs; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersion.java index df3899b9b23..2671f30255e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/OsVersion.java @@ -5,6 +5,7 @@ import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; import org.jetbrains.annotations.NotNull; +import java.util.Comparator; import java.util.Objects; /** @@ -14,6 +15,9 @@ import java.util.Objects; */ public class OsVersion implements Comparable<OsVersion> { + private static final Comparator<OsVersion> comparator = Comparator.comparing(OsVersion::cloud) + .thenComparing(OsVersion::version); + private final Version version; private final CloudName cloud; @@ -52,12 +56,8 @@ public class OsVersion implements Comparable<OsVersion> { } @Override - public int compareTo(@NotNull OsVersion o) { - int cloudCmp = cloud.compareTo(o.cloud()); - if (cloudCmp == 0) { // Same cloud, sort by version - return version.compareTo(o.version()); - } - return cloudCmp; + public int compareTo(@NotNull OsVersion that) { + return comparator.compare(this, that); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java index 02dadc300b3..37261b0fdc4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ZoneRegistryMock.java @@ -160,25 +160,6 @@ public class ZoneRegistryMock extends AbstractComponent implements ZoneRegistry } @Override - public Optional<URI> getLogServerUri(DeploymentId deploymentId) { - if ( ! hasZone(deploymentId.zoneId())) - return Optional.empty(); - - String kibanaQuery = "/#/discover?_g=()&_a=(columns:!(_source)," + - "index:'logstash-*',interval:auto," + - "query:(query_string:(analyze_wildcard:!t,query:'" + - "HV-tenant:%22" + deploymentId.applicationId().tenant().value() + "%22%20" + - "AND%20HV-application:%22" + deploymentId.applicationId().application().value() + "%22%20" + - "AND%20HV-region:%22" + deploymentId.zoneId().region().value() + "%22%20" + - "AND%20HV-instance:%22" + deploymentId.applicationId().instance().value() + "%22%20" + - "AND%20HV-environment:%22" + deploymentId.zoneId().environment().value() + "%22'))," + - "sort:!('@timestamp',desc))"; - - URI kibanaPath = URI.create(kibanaQuery); - return Optional.of(URI.create(String.format("http://log.%s.test", deploymentId.zoneId().value())).resolve(kibanaPath)); - } - - @Override public Optional<Duration> getDeploymentTimeToLive(ZoneId zoneId) { return Optional.ofNullable(deploymentTimeToLive.get(zoneId)); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment.json index 00177bf1afc..ac1797986fc 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment.json @@ -7,7 +7,6 @@ "http://alias-endpoint.vespa.yahooapis.com:4080" ], "nodes": "http://localhost:8080/zone/v2/prod/us-central-1/nodes/v2/node/%3F&recursive=true&application=tenant1.application1.default", - "elkUrl": "http://log.prod.us-central-1.test/#/discover?_g=()&_a=(columns:!(_source),index:'logstash-*',interval:auto,query:(query_string:(analyze_wildcard:!t,query:'HV-tenant:%22tenant1%22%20AND%20HV-application:%22application1%22%20AND%20HV-region:%22us-central-1%22%20AND%20HV-instance:%22default%22%20AND%20HV-environment:%22prod%22')),sort:!('@timestamp',desc))", "yamasUrl": "http://monitoring-system.test/?environment=prod®ion=us-central-1&application=tenant1.application1", "version": "(ignore)", "revision": "(ignore)", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/prod-us-central-1.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/prod-us-central-1.json index c7693c3d0d4..a3380d823f3 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/prod-us-central-1.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/prod-us-central-1.json @@ -13,7 +13,6 @@ "http://alias-endpoint.vespa.yahooapis.com:4080" ], "nodes": "http://localhost:8080/zone/v2/prod/us-central-1/nodes/v2/node/%3F&recursive=true&application=tenant1.application1.default", - "elkUrl": "http://log.prod.us-central-1.test/#/discover?_g=()&_a=(columns:!(_source),index:'logstash-*',interval:auto,query:(query_string:(analyze_wildcard:!t,query:'HV-tenant:%22tenant1%22%20AND%20HV-application:%22application1%22%20AND%20HV-region:%22us-central-1%22%20AND%20HV-instance:%22default%22%20AND%20HV-environment:%22prod%22')),sort:!('@timestamp',desc))", "yamasUrl": "http://monitoring-system.test/?environment=prod®ion=us-central-1&application=tenant1.application1", "version": "(ignore)", "revision": "(ignore)", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiTest.java index d7fd38b5f41..bb488d0af22 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/user/UserApiTest.java @@ -16,7 +16,7 @@ public class UserApiTest extends ControllerContainerTest { ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); tester.assertResponse(authenticatedRequest("http://localhost:8080/user/v1/"), - "{\"error-code\":\"NOT_FOUND\",\"message\":\"No 'GET' handler at '/user/v1/'\"}", 404); + "{\n \"code\" : 403,\n \"message\" : \"Access denied\"\n" + "}", 403); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/PathGroupTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/PathGroupTest.java index b7c751638c8..d110ff4c2fe 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/PathGroupTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/PathGroupTest.java @@ -39,8 +39,11 @@ public class PathGroupTest { String[] parts2 = path2.split("/"); int end = Math.min(parts1.length, parts2.length); - if (end < parts1.length && ! parts2[end - 1].equals("{*}") && ! parts1[end].equals("{*}")) continue; - if (end < parts2.length && ! parts1[end - 1].equals("{*}") && ! parts2[end].equals("{*}")) continue; + // If one path has more parts than the other ... + // and the other doesn't end with a wildcard matcher ... + // and the longest one isn't just one part longer, which is a wildcard ... + if (end < parts1.length && (end == 0 || ! parts2[end - 1].equals("{*}")) && ! parts1[end].equals("{*}")) continue; + if (end < parts2.length && (end == 0 || ! parts1[end - 1].equals("{*}")) && ! parts2[end].equals("{*}")) continue; int i; for (i = 0; i < end; i++) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/RoleMembershipTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/RoleMembershipTest.java index bc810fdb5c5..d4e673a02ae 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/RoleMembershipTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/role/RoleMembershipTest.java @@ -82,6 +82,9 @@ public class RoleMembershipTest { assertTrue(roles.allows(Action.create, "/application/v4/tenant/t1/application/a1/jobreport")); assertTrue(roles.allows(Action.update, "/application/v4/tenant/t1/application/a1")); assertTrue("Global read access", roles.allows(Action.read, "/controller/v1/foo")); + assertTrue("Dashboard read access", roles.allows(Action.read, "/")); + assertTrue("Dashboard read access", roles.allows(Action.read, "/d/nodes")); + assertTrue("Dashboard read access", roles.allows(Action.read, "/statuspage/v1/incidents")); } } diff --git a/defaults/abi-spec.json b/defaults/abi-spec.json index 95dc2e40353..ee76627a61c 100644 --- a/defaults/abi-spec.json +++ b/defaults/abi-spec.json @@ -8,6 +8,7 @@ "methods": [ "public java.lang.String vespaUser()", "public java.lang.String vespaHostname()", + "public java.lang.String temporaryApplicationStorage()", "public java.lang.String vespaHome()", "public java.lang.String underVespaHome(java.lang.String)", "public int vespaWebServicePort()", diff --git a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java index 0fce5d654fb..f1b7e38986f 100644 --- a/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java +++ b/defaults/src/main/java/com/yahoo/vespa/defaults/Defaults.java @@ -1,12 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.defaults; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.net.InetAddress; -import java.nio.charset.StandardCharsets; -import java.util.logging.Logger; import java.util.Optional; +import java.util.logging.Logger; @@ -25,6 +21,7 @@ public class Defaults { private final String vespaHome; private final String vespaUser; private final String vespaHost; + private final String temporaryApplicationStorage; private final int vespaWebServicePort; private final int vespaPortBase; private final int vespaPortConfigServerRpc; @@ -35,6 +32,7 @@ public class Defaults { vespaHome = findVespaHome("/opt/vespa"); vespaUser = findVespaUser("vespa"); vespaHost = findVespaHostname("localhost"); + temporaryApplicationStorage = underVespaHome("var/vespa/application"); vespaWebServicePort = findWebServicePort(8080); vespaPortBase = findVespaPortBase(19000); vespaPortConfigServerRpc = findConfigServerPort(vespaPortBase + 70); @@ -116,6 +114,15 @@ public class Defaults { public String vespaHostname() { return vespaHost; } /** + * Returns the path where a Vespa application can store arbitrary files. This should only be used for temporary + * files as there are no availability guarantees for files stored here. The application must be able to recreate + * required files on its own (e.g. by downloading them from a remote source) if missing. + * + * @return the temporary storage path + */ + public String temporaryApplicationStorage() { return temporaryApplicationStorage; } + + /** * Returns the path to the root under which Vespa should read and write files. * Will not end with a "/". * @return the vespa home directory diff --git a/defaults/src/test/java/com/yahoo/vespa/defaults/DefaultsTestCase.java b/defaults/src/test/java/com/yahoo/vespa/defaults/DefaultsTestCase.java index 07d3c39fc9c..88f4ad6f2fd 100644 --- a/defaults/src/test/java/com/yahoo/vespa/defaults/DefaultsTestCase.java +++ b/defaults/src/test/java/com/yahoo/vespa/defaults/DefaultsTestCase.java @@ -1,8 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.defaults; +import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * @author arnej27959 @@ -25,14 +27,20 @@ public class DefaultsTestCase { @Test public void testPortsArePositive() { Defaults d = Defaults.getDefaults(); - assertEquals(true, d.vespaPortBase() > 0); - assertEquals(true, d.vespaWebServicePort() > 0); - assertEquals(true, d.vespaConfigServerRpcPort() > 0); - assertEquals(true, d.vespaConfigServerHttpPort() > 0); - assertEquals(true, d.vespaConfigProxyRpcPort() > 0); + assertTrue(d.vespaPortBase() > 0); + assertTrue(d.vespaWebServicePort() > 0); + assertTrue(d.vespaConfigServerRpcPort() > 0); + assertTrue(d.vespaConfigServerHttpPort() > 0); + assertTrue(d.vespaConfigProxyRpcPort() > 0); } @Test + public void testTemporaryApplicationStorage() { + assertEquals("/opt/vespa/var/vespa/application", Defaults.getDefaults().temporaryApplicationStorage()); + } + + @Test + @Ignore // This is run manually for human inspection. Contains no assertions public void dumpAllVars() { Defaults d = Defaults.getDefaults(); System.out.println("vespa user = '" + d.vespaUser() + "'"); diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java index 801b9b03bb4..e57f61ce5f4 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java @@ -276,17 +276,15 @@ public class DockerImpl implements Docker { private Stream<Container> asContainer(String container) { return inspectContainerCmd(container) - .map(response -> - new Container( - response.getConfig().getHostName(), - DockerImage.fromString(response.getConfig().getImage()), - containerResourcesFromHostConfig(response.getHostConfig()), - new ContainerName(decode(response.getName())), - Container.State.valueOf(response.getState().getStatus().toUpperCase()), - response.getState().getPid() - )) - .map(Stream::of) - .orElse(Stream.empty()); + .map(response -> new Container( + response.getConfig().getHostName(), + DockerImage.fromString(response.getConfig().getImage()), + containerResourcesFromHostConfig(response.getHostConfig()), + new ContainerName(decode(response.getName())), + Container.State.valueOf(response.getState().getStatus().toUpperCase()), + response.getState().getPid() + )) + .stream(); } private static ContainerResources containerResourcesFromHostConfig(HostConfig hostConfig) { diff --git a/document/src/vespa/document/select/operator.cpp b/document/src/vespa/document/select/operator.cpp index 1b97a375a5f..36113844d88 100644 --- a/document/src/vespa/document/select/operator.cpp +++ b/document/src/vespa/document/select/operator.cpp @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "operator.h" -#include <vespa/vespalib/util/regexp.h> +#include <regex> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <cassert> @@ -127,8 +127,12 @@ RegexOperator::match(const vespalib::string& val, vespalib::stringref expr) cons { // Should we catch this in parsing? if (expr.size() == 0) return ResultList(Result::True); - vespalib::Regexp expression(expr); - return ResultList(Result::get(expression.match(val))); + try { + std::basic_regex<char> expression(expr.data(), expr.size()); + return ResultList(Result::get(std::regex_search(val.c_str(), val.c_str() + val.size(), expression))); + } catch (std::regex_error &) { + return ResultList(Result::False); + } } const RegexOperator RegexOperator::REGEX("=~"); diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index bb6b346abd4..5fa8e39ac81 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -64,11 +64,6 @@ public class Flags { "Whether to use a dedicated node for the logserver.", "Takes effect at redeployment", APPLICATION_ID); - public static final UnboundBooleanFlag USE_DOCKER_91 = defineFeatureFlag( - "use-docker-91", false, - "Whether to upgrade to Docker version 1.13.1-91.git07f3374", "Takes effect after restart of host admin", - HOSTNAME); - public static final UnboundDoubleFlag CONTAINER_CPU_CAP = defineDoubleFlag( "container-cpu-cap", 0, "Hard limit on how many CPUs a container may use. This value is multiplied by CPU allocated to node, so " + diff --git a/logd/CMakeLists.txt b/logd/CMakeLists.txt index 85aebac365b..9a5fdf32841 100644 --- a/logd/CMakeLists.txt +++ b/logd/CMakeLists.txt @@ -15,6 +15,7 @@ vespa_define_module( TESTS src/tests/legacy_forwarder + src/tests/proto_converter src/tests/rotate ) diff --git a/logd/src/logd/CMakeLists.txt b/logd/src/logd/CMakeLists.txt index a3ff813ad96..baf52f1d5d8 100644 --- a/logd/src/logd/CMakeLists.txt +++ b/logd/src/logd/CMakeLists.txt @@ -15,11 +15,15 @@ vespa_add_library(logd STATIC conn.cpp legacy_forwarder.cpp metrics.cpp + proto_converter.cpp state_reporter.cpp watcher.cpp ${logd_PROTOBUF_SRCS} DEPENDS ) + vespa_generate_config(logd ../main/resources/configdefinitions/logd.def) install_config_definition(../main/resources/configdefinitions/logd.def cloud.config.log.logd.def) + +vespa_add_target_package_dependency(logd Protobuf) diff --git a/logd/src/logd/legacy_forwarder.cpp b/logd/src/logd/legacy_forwarder.cpp index b512bab7fb6..b8b93a03530 100644 --- a/logd/src/logd/legacy_forwarder.cpp +++ b/logd/src/logd/legacy_forwarder.cpp @@ -3,6 +3,8 @@ #include "exceptions.h" #include "legacy_forwarder.h" #include "metrics.h" +#include <vespa/log/log_message.h> +#include <vespa/log/exceptions.h> #include <vespa/vespalib/component/vtag.h> #include <vespa/vespalib/locale/c.h> #include <unistd.h> @@ -11,6 +13,10 @@ LOG_SETUP(""); using LogLevel = ns_log::Logger::LogLevel; +using ns_log::BadLogLineException; +using ns_log::LogMessage; +using ns_log::Logger; +using LogLevel = Logger::LogLevel; namespace logdemon { @@ -18,7 +24,6 @@ LegacyForwarder::LegacyForwarder(Metrics &metrics) : _logserverfd(-1), _metrics(metrics), _forwardMap(), - _levelparser(), _badLines(0) {} LegacyForwarder::~LegacyForwarder() = default; @@ -70,120 +75,30 @@ LegacyForwarder::forwardLine(const char *line, const char *eol) bool LegacyForwarder::parseline(const char *linestart, const char *lineend) { - int llength = lineend - linestart; - - const char *fieldstart = linestart; - // time - const char *tab = strchr(fieldstart, '\t'); - if (tab == nullptr || tab == fieldstart) { - LOG(spam, "bad logline no 1. tab: %.*s", llength, linestart); - ++_badLines; - return false; - } - char *eod; - double logtime = vespalib::locale::c::strtod(fieldstart, &eod); - if (eod != tab) { - int fflen = tab - linestart; - LOG(spam, "bad logline first field not strtod parsable: %.*s", fflen, linestart); - ++_badLines; - return false; - } - time_t now = time(nullptr); - if (logtime - 864000 > now) { - int fflen = tab - linestart; - LOG(warning, "bad logline, time %.*s > 10 days in the future", fflen, linestart); - ++_badLines; - return false; - } - if (logtime + 8640000 < now) { - int fflen = tab - linestart; - LOG(warning, "bad logline, time %.*s > 100 days in the past", fflen, linestart); + LogMessage message; + try { + message.parse_log_line(std::string_view(linestart, lineend - linestart)); + } catch (BadLogLineException &e) { + LOG(spam, "bad logline: %s", e.what()); ++_badLines; return false; } - // hostname - fieldstart = tab + 1; - tab = strchr(fieldstart, '\t'); - if (tab == nullptr) { - LOG(spam, "bad logline no 2. tab: %.*s", llength, linestart); - ++_badLines; - return false; - } - - // pid - fieldstart = tab + 1; - tab = strchr(fieldstart, '\t'); - if (tab == nullptr || tab == fieldstart) { - LOG(spam, "bad logline no 3. tab: %.*s", llength, linestart); - return false; - } - - // service - fieldstart = tab + 1; - tab = strchr(fieldstart, '\t'); - if (tab == nullptr) { - LOG(spam, "bad logline no 4. tab: %.*s", llength, linestart); - ++_badLines; - return false; - } - if (tab == fieldstart) { - LOG(spam, "empty service in logline: %.*s", llength, linestart); - } - std::string service(fieldstart, tab-fieldstart); - - // component - fieldstart = tab + 1; - tab = strchr(fieldstart, '\t'); - if (tab == nullptr || tab == fieldstart) { - LOG(spam, "bad logline no 5. tab: %.*s", llength, linestart); - ++_badLines; - return false; - } - std::string component(fieldstart, tab-fieldstart); - - // level - fieldstart = tab + 1; - tab = strchr(fieldstart, '\t'); - if (tab == nullptr || tab == fieldstart) { - LOG(spam, "bad logline no 6. tab: %.*s", llength, linestart); - ++_badLines; - return false; - } - std::string level(fieldstart, tab-fieldstart); - LogLevel l = _levelparser.parseLevel(level.c_str()); - - // rest is freeform message, must be on this line: - if (tab > lineend) { - LOG(spam, "bad logline last tab after end: %.*s", llength, linestart); - ++_badLines; - return false; + std::string logLevelName; + if (message.level() >= LogLevel::NUM_LOGLEVELS) { + logLevelName = "unknown"; + } else { + logLevelName = Logger::logLevelNames[message.level()]; } - - _metrics.countLine(level, service); + _metrics.countLine(logLevelName, message.service()); // Check overrides - ForwardMap::iterator found = _forwardMap.find(l); + ForwardMap::iterator found = _forwardMap.find(message.level()); if (found != _forwardMap.end()) { return found->second; } return false; // Unknown log level } -LogLevel -LevelParser::parseLevel(const char *level) -{ - using ns_log::Logger; - - LogLevel l = Logger::parseLevel(level); - if (l >= 0 && l <= Logger::NUM_LOGLEVELS) { - return l; - } - if (_seenLevelMap.find(level) == _seenLevelMap.end()) { - LOG(warning, "unknown level '%s'", level); - _seenLevelMap.insert(level); - } - return Logger::fatal; -} } // namespace diff --git a/logd/src/logd/legacy_forwarder.h b/logd/src/logd/legacy_forwarder.h index da8dbcc82ab..81a93ce1d50 100644 --- a/logd/src/logd/legacy_forwarder.h +++ b/logd/src/logd/legacy_forwarder.h @@ -8,21 +8,11 @@ namespace logdemon { -using SeenMap = std::unordered_set<std::string>; // Mapping saying if a level should be forwarded or not using ForwardMap = std::map<ns_log::Logger::LogLevel, bool>; struct Metrics; -class LevelParser -{ -private: - SeenMap _seenLevelMap; -public: - ns_log::Logger::LogLevel parseLevel(const char *level); - LevelParser() : _seenLevelMap() {} -}; - /** * Class used to forward log lines to the logserver via a one-way text protocol. */ @@ -31,7 +21,6 @@ private: int _logserverfd; Metrics &_metrics; ForwardMap _forwardMap; - LevelParser _levelparser; int _badLines; const char *copystr(const char *b, const char *e) { int len = e - b; diff --git a/logd/src/logd/log_protocol_proto.h b/logd/src/logd/log_protocol_proto.h new file mode 100644 index 00000000000..a8d5e4aa208 --- /dev/null +++ b/logd/src/logd/log_protocol_proto.h @@ -0,0 +1,11 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsuggest-override" + +#include "log_protocol.pb.h" + +#pragma GCC diagnostic pop + diff --git a/logd/src/logd/proto_converter.cpp b/logd/src/logd/proto_converter.cpp new file mode 100644 index 00000000000..b3facd4ef4a --- /dev/null +++ b/logd/src/logd/proto_converter.cpp @@ -0,0 +1,65 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "proto_converter.h" + +using ns_log::LogMessage; +using ns_log::Logger; + +namespace logdemon { + +void +ProtoConverter::log_messages_to_proto(const std::vector<LogMessage>& messages, ProtoLogRequest& proto) +{ + for (const auto& message : messages) { + auto* proto_message = proto.add_log_messages(); + log_message_to_proto(message, *proto_message); + } +} + +namespace { + +using ProtoLogLevel = ::logserver::protocol::protobuf::LogMessage_Level; + +ProtoLogLevel +convert_level(const Logger::LogLevel& level) +{ + switch (level) { + case Logger::fatal: + return ProtoLogLevel::LogMessage_Level_FATAL; + case Logger::error: + return ProtoLogLevel::LogMessage_Level_ERROR; + case Logger::warning: + return ProtoLogLevel::LogMessage_Level_WARNING; + case Logger::config: + return ProtoLogLevel::LogMessage_Level_CONFIG; + case Logger::info: + return ProtoLogLevel::LogMessage_Level_INFO; + case Logger::event: + return ProtoLogLevel::LogMessage_Level_EVENT; + case Logger::debug: + return ProtoLogLevel::LogMessage_Level_DEBUG; + case Logger::spam: + return ProtoLogLevel::LogMessage_Level_SPAM; + case Logger::NUM_LOGLEVELS: + return ProtoLogLevel::LogMessage_Level_UNKNOWN; + default: + return ProtoLogLevel::LogMessage_Level_UNKNOWN; + } +} + +} + +void +ProtoConverter::log_message_to_proto(const LogMessage& message, ProtoLogMessage& proto) +{ + proto.set_time_nanos(message.time_nanos()); + proto.set_hostname(message.hostname()); + proto.set_process_id(message.process_id()); + proto.set_thread_id(message.thread_id()); + proto.set_service(message.service()); + proto.set_component(message.component()); + proto.set_level(convert_level(message.level())); + proto.set_payload(message.payload()); +} + +} diff --git a/logd/src/logd/proto_converter.h b/logd/src/logd/proto_converter.h new file mode 100644 index 00000000000..688648b99de --- /dev/null +++ b/logd/src/logd/proto_converter.h @@ -0,0 +1,20 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "log_protocol_proto.h" +#include <vespa/log/log_message.h> +#include <vector> + +namespace logdemon { + +/** + * Contains functions to convert log messages to protobuf objects. + */ +struct ProtoConverter { + using ProtoLogRequest = logserver::protocol::protobuf::LogRequest; + using ProtoLogMessage = logserver::protocol::protobuf::LogMessage; + + static void log_messages_to_proto(const std::vector<ns_log::LogMessage>& messages, ProtoLogRequest& proto); + static void log_message_to_proto(const ns_log::LogMessage& message, ProtoLogMessage& proto); +}; + +} diff --git a/logd/src/tests/proto_converter/CMakeLists.txt b/logd/src/tests/proto_converter/CMakeLists.txt new file mode 100644 index 00000000000..5ca048ecd4e --- /dev/null +++ b/logd/src/tests/proto_converter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(logd_proto_converter_test_app TEST + SOURCES + proto_converter_test.cpp + DEPENDS + logd + gtest +) +vespa_add_test(NAME logd_proto_converter_test_app COMMAND logd_proto_converter_test_app) diff --git a/logd/src/tests/proto_converter/proto_converter_test.cpp b/logd/src/tests/proto_converter/proto_converter_test.cpp new file mode 100644 index 00000000000..aa0b00e34d6 --- /dev/null +++ b/logd/src/tests/proto_converter/proto_converter_test.cpp @@ -0,0 +1,88 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <logd/proto_converter.h> +#include <vespa/vespalib/gtest/gtest.h> + +using ns_log::Logger; +using ns_log::LogMessage; + +using Converter = logdemon::ProtoConverter; +using ProtoLogLevel = logserver::protocol::protobuf::LogMessage_Level; + +struct LogMessageTest : public ::testing::Test { + LogMessage message; + Converter::ProtoLogMessage proto; + void convert() { + Converter::log_message_to_proto(message, proto); + } + void expect_log_level_converted(ProtoLogLevel proto_level, Logger::LogLevel message_level) { + message = LogMessage(1, "", 1, 1, "", "", message_level, ""); + convert(); + EXPECT_EQ(proto_level, proto.level()); + } +}; + +void +expect_proto_log_message_equal(int64_t exp_time_nanos, + const std::string& exp_hostname, + int32_t exp_process_id, + int32_t exp_thread_id, + const std::string& exp_service, + const std::string& exp_component, + ProtoLogLevel exp_level, + const std::string& exp_payload, + const Converter::ProtoLogMessage& proto) +{ + EXPECT_EQ(exp_time_nanos, proto.time_nanos()); + EXPECT_EQ(exp_hostname, proto.hostname()); + EXPECT_EQ(exp_process_id, proto.process_id()); + EXPECT_EQ(exp_thread_id, proto.thread_id()); + EXPECT_EQ(exp_service, proto.service()); + EXPECT_EQ(exp_component, proto.component()); + EXPECT_EQ(exp_level, proto.level()); + EXPECT_EQ(exp_payload, proto.payload()); +} + +TEST_F(LogMessageTest, log_message_is_converted) +{ + message = LogMessage(12345, "foo_host", 3, 5, "foo_service", "foo_component", Logger::info, "foo_payload"); + convert(); + expect_proto_log_message_equal(12345, "foo_host", 3, 5, "foo_service", "foo_component", + ProtoLogLevel::LogMessage_Level_INFO, "foo_payload", proto); +} + +TEST_F(LogMessageTest, log_levels_are_converted) +{ + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_FATAL, Logger::fatal); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_ERROR, Logger::error); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_WARNING, Logger::warning); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_CONFIG, Logger::config); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_INFO, Logger::info); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_EVENT, Logger::event); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_DEBUG, Logger::debug); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_SPAM, Logger::spam); + expect_log_level_converted(ProtoLogLevel::LogMessage_Level_UNKNOWN, Logger::NUM_LOGLEVELS); +} + +struct LogRequestTest : public ::testing::Test { + std::vector<LogMessage> messages; + Converter::ProtoLogRequest proto; + void convert() { + Converter::log_messages_to_proto(messages, proto); + } +}; + +TEST_F(LogRequestTest, log_messages_are_converted_to_request) +{ + messages.emplace_back(12345, "foo_host", 3, 5, "foo_service", "foo_component", Logger::info, "foo_payload"); + messages.emplace_back(54321, "bar_host", 7, 9, "bar_service", "bar_component", Logger::event, "bar_payload"); + convert(); + EXPECT_EQ(2, proto.log_messages_size()); + expect_proto_log_message_equal(12345, "foo_host", 3, 5, "foo_service", "foo_component", + ProtoLogLevel::LogMessage_Level_INFO, "foo_payload", proto.log_messages(0)); + expect_proto_log_message_equal(54321, "bar_host", 7, 9, "bar_service", "bar_component", + ProtoLogLevel::LogMessage_Level_EVENT, "bar_payload", proto.log_messages(1)); +} + +GTEST_MAIN_RUN_ALL_TESTS() + diff --git a/metrics/src/main/java/com/yahoo/metrics/Metric.java b/metrics/src/main/java/com/yahoo/metrics/Metric.java index f1f389a4dfe..ad7ffc971f6 100644 --- a/metrics/src/main/java/com/yahoo/metrics/Metric.java +++ b/metrics/src/main/java/com/yahoo/metrics/Metric.java @@ -65,7 +65,7 @@ public abstract class Metric { } public List<String> getPathVector() { - List<String> result = new ArrayList<String>(); + List<String> result = new ArrayList<>(); result.add(getName()); MetricSet owner = this.owner; while (owner != null) { diff --git a/metrics/src/main/java/com/yahoo/metrics/MetricSet.java b/metrics/src/main/java/com/yahoo/metrics/MetricSet.java index 5305002782d..a9fea1c65a4 100644 --- a/metrics/src/main/java/com/yahoo/metrics/MetricSet.java +++ b/metrics/src/main/java/com/yahoo/metrics/MetricSet.java @@ -6,11 +6,11 @@ import com.yahoo.text.XMLWriter; import java.util.*; import java.util.logging.Logger; -public abstract class MetricSet extends Metric -{ +public abstract class MetricSet extends Metric { + private static Logger log = Logger.getLogger(MetricSet.class.getName()); - List<Metric> metricOrder = new ArrayList<Metric>(); // Keep added order for reporting + List<Metric> metricOrder = new ArrayList<>(); // Keep added order for reporting boolean registrationAltered; // Set to true if metrics have been // registered/unregistered since last time // it was reset diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 52d55dad087..e34e97dc2aa 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -150,7 +150,7 @@ public class DockerOperationsImpl implements DockerOperations { "ff02::1\tip6-allnodes\n" + "ff02::2\tip6-allrouters\n" + ipV6Local.getHostAddress() + '\t' + hostname + '\n'); - ipV4Local.ifPresent(ipv4 -> etcHosts.append(ipv4.getHostAddress() + '\t' + hostname + '\n')); + ipV4Local.ifPresent(ipv4 -> etcHosts.append(ipv4.getHostAddress()).append('\t').append(hostname).append('\n')); containerData.addFile(Paths.get("/etc/hosts"), etcHosts.toString()); } @@ -199,16 +199,17 @@ public class DockerOperationsImpl implements DockerOperations { @Override public ProcessResult executeCommandInNetworkNamespace(NodeAgentContext context, String... command) { - final int containerPid = docker.getContainer(context.containerName()) + int containerPid = docker.getContainer(context.containerName()) .filter(container -> container.state.isRunning()) .orElseThrow(() -> new RuntimeException( "Found no running container named " + context.containerName().asString())) .pid; - final String[] wrappedCommand = Stream.concat( - Stream.of("nsenter", String.format("--net=/proc/%d/ns/net", containerPid), "--"), - Stream.of(command)) - .toArray(String[]::new); + String[] wrappedCommand = Stream.concat(Stream.of("nsenter", + String.format("--net=/proc/%d/ns/net", containerPid), + "--"), + Stream.of(command)) + .toArray(String[]::new); try { Pair<Integer, String> result = processExecuter.exec(wrappedCommand); @@ -267,7 +268,7 @@ public class DockerOperationsImpl implements DockerOperations { } private static void addMounts(NodeAgentContext context, Docker.CreateContainerCommand command) { - final Path varLibSia = Paths.get("/var/lib/sia"); + Path varLibSia = Paths.get("/var/lib/sia"); // Paths unique to each container List<Path> paths = new ArrayList<>(Arrays.asList( diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 60fdb242cfd..0f150ad9065 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -225,8 +225,6 @@ public class StorageMaintainer { /** Deletes old log files for vespa, nginx, logstash, etc. */ public void removeOldFilesFromNode(NodeAgentContext context) { Path[] logPaths = { - context.pathInNodeUnderVespaHome("logs/elasticsearch2"), - context.pathInNodeUnderVespaHome("logs/logstash2"), context.pathInNodeUnderVespaHome("logs/daemontools_y"), context.pathInNodeUnderVespaHome("logs/nginx"), context.pathInNodeUnderVespaHome("logs/vespa") diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index e9eacddb060..5b5d13ca346 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -154,9 +154,9 @@ public class NodeAdminImpl implements NodeAdmin { public void stopNodeAgentServices(List<String> hostnames) { // Each container may spend 1-1:30 minutes stopping hostnames.parallelStream() - .filter(nodeAgentWithSchedulerByHostname::containsKey) - .map(nodeAgentWithSchedulerByHostname::get) - .forEach(NodeAgentWithScheduler::stopForHostSuspension); + .filter(nodeAgentWithSchedulerByHostname::containsKey) + .map(nodeAgentWithSchedulerByHostname::get) + .forEach(NodeAgentWithScheduler::stopForHostSuspension); } @Override @@ -171,8 +171,8 @@ public class NodeAdminImpl implements NodeAdmin { } // Set-difference. Returns minuend minus subtrahend. - private static <T> Set<T> diff(final Set<T> minuend, final Set<T> subtrahend) { - final HashSet<T> result = new HashSet<>(minuend); + private static <T> Set<T> diff(Set<T> minuend, Set<T> subtrahend) { + var result = new HashSet<>(minuend); result.removeAll(subtrahend); return result; } @@ -127,9 +127,10 @@ <module>vespaclient-java</module> <module>vespa-athenz</module> <module>vespa-documentgen-plugin</module> - <module>vespa_feed_perf</module> <module>vespa-hadoop</module> <module>vespa-http-client</module> + <module>vespa-testrunner-components</module> + <module>vespa_feed_perf</module> <module>vespa_jersey2</module> <module>vespajlib</module> <module>vespalog</module> diff --git a/searchcore/src/tests/proton/docsummary/docsummary.cpp b/searchcore/src/tests/proton/docsummary/docsummary.cpp index fb8674b5255..a3acbfdbfe0 100644 --- a/searchcore/src/tests/proton/docsummary/docsummary.cpp +++ b/searchcore/src/tests/proton/docsummary/docsummary.cpp @@ -34,6 +34,7 @@ #include <vespa/vespalib/encoding/base64.h> #include <vespa/config-bucketspaces.h> #include <vespa/vespalib/testkit/testapp.h> +#include <regex> #include <vespa/log/log.h> LOG_SETUP("docsummary_test"); @@ -655,7 +656,8 @@ Test::requireThatSummariesTimeout() vespalib::SimpleBuffer buf; vespalib::Slime summary = getSlime(*rep, 0, false); JsonFormat::encode(summary, buf, false); - EXPECT_TRUE(vespalib::Regexp("Timed out with -[0-9]+us left.").match(buf.get().make_stringref())); + auto bufstring = buf.get().make_stringref(); + EXPECT_TRUE(std::regex_search(bufstring.data(), bufstring.data() + bufstring.size(), std::basic_regex<char>("Timed out with -[0-9]+us left."))); } void diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/Reference.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/Reference.java index cd5f42ac05c..829a796eee0 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/Reference.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/Reference.java @@ -21,11 +21,11 @@ import java.util.stream.Collectors; */ public class Reference extends TypeContext.Name { + private final int hashCode; + private final Arguments arguments; - /** - * The output, or null if none - */ + /** The output, or null if none */ private final String output; public Reference(String name, Arguments arguments, String output) { @@ -34,6 +34,7 @@ public class Reference extends TypeContext.Name { Objects.requireNonNull(arguments, "arguments cannot be null"); this.arguments = arguments; this.output = output; + this.hashCode = Objects.hash(name(), arguments, output); } public Arguments arguments() { return arguments; } @@ -115,7 +116,8 @@ public class Reference extends TypeContext.Name { @Override public boolean equals(Object o) { if (o == this) return true; - if (!(o instanceof Reference)) return false; + if (o.hashCode() != this.hashCode()) return false; // because this has a fast hashCode + if ( ! (o instanceof Reference)) return false; Reference other = (Reference) o; if (!Objects.equals(other.name(), this.name())) return false; if (!Objects.equals(other.arguments, this.arguments)) return false; @@ -125,7 +127,7 @@ public class Reference extends TypeContext.Name { @Override public int hashCode() { - return Objects.hash(name(), arguments, output); + return hashCode; } @Override diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Bucket.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Bucket.java index 120d27d37ff..5e7b60411c9 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Bucket.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Bucket.java @@ -21,7 +21,7 @@ import edu.umd.cs.findbugs.annotations.NonNull; /** * An aggregation of data which is only written to from a single thread. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class Bucket { diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/DimensionCache.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/DimensionCache.java index 0318368a31c..8893a88d94c 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/DimensionCache.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/DimensionCache.java @@ -13,7 +13,7 @@ import java.util.Set; * The persistence layer for metrics. Both CPU and memory hungry, but * it runs in its own little world. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ class DimensionCache { @@ -74,10 +74,8 @@ class DimensionCache { private static final long MAX_AGE_MILLIS = 4 * 3600 * 1000; - private void padMetric(String metric, - Bucket toPresent, - int currentDataPoints) { - final LinkedHashMap<Point, TimeStampedMetric> cachedPoints = getCachedMetric(metric); + private void padMetric(String metric, Bucket toPresent, int currentDataPoints) { + LinkedHashMap<Point, TimeStampedMetric> cachedPoints = getCachedMetric(metric); int toAdd = pointsToKeep - currentDataPoints; @SuppressWarnings({"unchecked","rawtypes"}) Entry<Point, TimeStampedMetric>[] cachedEntries = cachedPoints.entrySet().toArray(new Entry[0]); @@ -87,8 +85,8 @@ class DimensionCache { if (leastOld.getValue().millis + MAX_AGE_MILLIS < nowMillis) { continue; } - final Identifier id = new Identifier(metric, leastOld.getKey()); - if (!toPresent.hasIdentifier(id)) { + Identifier id = new Identifier(metric, leastOld.getKey()); + if ( ! toPresent.hasIdentifier(id)) { toPresent.put(id, leastOld.getValue().metric.pruneData()); --toAdd; } @@ -99,7 +97,7 @@ class DimensionCache { private LinkedHashMap<Point, TimeStampedMetric> getCachedMetric(String metricName) { LinkedHashMap<Point, TimeStampedMetric> points = persistentData.get(metricName); if (points == null) { - points = new LinkedHashMap<Point, TimeStampedMetric>(16, 0.75f, false) { + points = new LinkedHashMap<>(16, 0.75f, false) { protected @Override boolean removeEldestEntry(Map.Entry<Point, TimeStampedMetric> eldest) { return size() > pointsToKeep; } diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Measurement.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Measurement.java index dee98024dda..cc7a4b0f717 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Measurement.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Measurement.java @@ -5,7 +5,7 @@ package com.yahoo.metrics.simple; * Wrapper class for the actually measured value. Candidate for removal, but I * wanted a type instead of some opaque instance of Number. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class Measurement { private final Number magnitude; diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricAggregator.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricAggregator.java index 42af115bae9..7d142a8e4d8 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricAggregator.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricAggregator.java @@ -12,7 +12,7 @@ import com.yahoo.metrics.ManagerConfig; * snapshots for external consumption. Using the correct executor gives the * necessary guarantuees for this being invoked from only a single thread. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ class MetricAggregator implements Runnable { @@ -44,7 +44,7 @@ class MetricAggregator implements Runnable { } private void createSnapshot(Bucket toDelete) { - final Bucket toPresent = new Bucket(); + Bucket toPresent = new Bucket(); for (Bucket b : buffer) { if (b == null) { continue; @@ -57,8 +57,8 @@ class MetricAggregator implements Runnable { private Bucket updateBuffer() { List<Bucket> buckets = metricsCollection.fetch(); - final long toMillis = System.currentTimeMillis(); - final int bucketIndex = generation++ % buffer.length; + long toMillis = System.currentTimeMillis(); + int bucketIndex = generation++ % buffer.length; Bucket bucketToDelete = buffer[bucketIndex]; Bucket latest = new Bucket(fromMillis, toMillis); for (Bucket b : buckets) { diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricReceiver.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricReceiver.java index e6e41ace04a..a2b82978a26 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricReceiver.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/MetricReceiver.java @@ -15,7 +15,7 @@ import com.yahoo.concurrent.ThreadLocalDirectory; * in constructors for declaring instances of {@link Counter} and {@link Gauge} * for the actual measurement of metrics. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ @Beta public class MetricReceiver { diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Point.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Point.java index 5dc54c28ba0..672d05c1874 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Point.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Point.java @@ -129,4 +129,5 @@ public final class Point implements Context { Value[] getLocation() { return location; } + } diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Sample.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Sample.java index d55dce7bd79..837e93de09a 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/Sample.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/Sample.java @@ -7,9 +7,10 @@ import com.yahoo.metrics.simple.UntypedMetric.AssumedType; * A single metric measurement and all the meta data needed to route it * correctly. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class Sample { + private final Identifier identifier; private final Measurement measurement; private final AssumedType metricType; diff --git a/simplemetrics/src/main/java/com/yahoo/metrics/simple/jdisc/SimpleMetricConsumer.java b/simplemetrics/src/main/java/com/yahoo/metrics/simple/jdisc/SimpleMetricConsumer.java index 66d7e0e7c2b..ee5f18e78d3 100644 --- a/simplemetrics/src/main/java/com/yahoo/metrics/simple/jdisc/SimpleMetricConsumer.java +++ b/simplemetrics/src/main/java/com/yahoo/metrics/simple/jdisc/SimpleMetricConsumer.java @@ -16,7 +16,7 @@ import com.yahoo.metrics.simple.UntypedMetric.AssumedType; /** * The single user facing part of the JDisc interfaces of simple metrics. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class SimpleMetricConsumer implements MetricConsumer { diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index b2d554c1e42..9795f5db5dc 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -26,6 +26,8 @@ using document::test::makeDocumentBucket; using document::test::makeBucketSpace; using document::BucketSpace; using document::FixedBucketSpaces; +using document::BucketId; +using document::Bucket; namespace storage::distributor { @@ -112,6 +114,14 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture, CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted); CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted); CPPUNIT_TEST(global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection); + CPPUNIT_TEST(non_owned_buckets_moved_to_read_only_db_on_ownership_change); + CPPUNIT_TEST(buckets_no_longer_available_are_not_moved_to_read_only_database); + CPPUNIT_TEST(non_owned_buckets_purged_when_read_only_support_is_config_disabled); + CPPUNIT_TEST(deferred_activated_state_does_not_enable_state_until_activation_received); + CPPUNIT_TEST(read_only_db_cleared_once_pending_state_is_activated); + CPPUNIT_TEST(read_only_db_is_populated_even_when_self_is_marked_down); + CPPUNIT_TEST(activate_cluster_state_request_with_mismatching_version_returns_actual_version); + CPPUNIT_TEST(activate_cluster_state_request_without_pending_transition_passes_message_through); CPPUNIT_TEST_SUITE_END(); public: @@ -123,10 +133,7 @@ protected: void testDistributorChangeWithGrouping(); void testNormalUsageInitializing(); void testFailedRequestBucketInfo(); - void testNoResponses(); void testBitChange(); - void testInconsistentChecksum(); - void testAddEmptyNode(); void testNodeDown(); void testStorageNodeInMaintenanceClearsBucketsForNode(); void testNodeDownCopiesGetInSync(); @@ -177,6 +184,14 @@ protected: void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted(); void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted(); void global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection(); + void non_owned_buckets_moved_to_read_only_db_on_ownership_change(); + void buckets_no_longer_available_are_not_moved_to_read_only_database(); + void non_owned_buckets_purged_when_read_only_support_is_config_disabled(); + void deferred_activated_state_does_not_enable_state_until_activation_received(); + void read_only_db_cleared_once_pending_state_is_activated(); + void read_only_db_is_populated_even_when_self_is_marked_down(); + void activate_cluster_state_request_with_mismatching_version_returns_actual_version(); + void activate_cluster_state_request_without_pending_transition_passes_message_through(); auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); } @@ -190,13 +205,32 @@ protected: getBucketDBUpdater().getDistributorComponent().getIndex(), clusterStateBundle, "ui")); - auto &repo = getBucketSpaceRepo(); - for (auto &elem : repo) { - elem.second->setClusterState(clusterStateBundle.getDerivedClusterState(elem.first)); + for (auto* repo : {&mutable_repo(), &read_only_repo()}) { + for (auto& space : *repo) { + space.second->setClusterState(clusterStateBundle.getDerivedClusterState(space.first)); + } } return clusterInfo; } + DistributorBucketSpaceRepo& mutable_repo() noexcept { return getBucketSpaceRepo(); } + // Note: not calling this "immutable_repo" since it may actually be modified by the pending + // cluster state component (just not by operations), so it would not have the expected semantics. + DistributorBucketSpaceRepo& read_only_repo() noexcept { return getReadOnlyBucketSpaceRepo(); } + + BucketDatabase& mutable_default_db() noexcept { + return mutable_repo().get(FixedBucketSpaces::default_space()).getBucketDatabase(); + } + BucketDatabase& mutable_global_db() noexcept { + return mutable_repo().get(FixedBucketSpaces::global_space()).getBucketDatabase(); + } + BucketDatabase& read_only_default_db() noexcept { + return read_only_repo().get(FixedBucketSpaces::default_space()).getBucketDatabase(); + } + BucketDatabase& read_only_global_db() noexcept { + return read_only_repo().get(FixedBucketSpaces::global_space()).getBucketDatabase(); + } + static std::string getNodeList(std::vector<uint16_t> nodes, size_t count); std::string getNodeList(std::vector<uint16_t> nodes); @@ -210,11 +244,17 @@ protected: return messagesPerBucketSpace * _bucketSpaces.size(); } + void trigger_completed_but_not_yet_activated_transition( + vespalib::stringref initial_state, uint32_t initial_buckets, uint32_t initial_expected_msgs, + vespalib::stringref pending_state, uint32_t pending_buckets, uint32_t pending_expected_msgs); + public: using OutdatedNodesMap = dbtransition::OutdatedNodesMap; void setUp() override { createLinks(); _bucketSpaces = getBucketSpaces(); + // Disable deferred activation by default (at least for now) to avoid breaking the entire world. + getConfig().setAllowStaleReadsDuringClusterStateTransitions(false); }; void tearDown() override { @@ -228,7 +268,7 @@ public: uint32_t bucketCount, uint32_t invalidBucketCount = 0) { - RequestBucketInfoReply* sreply = new RequestBucketInfoReply(cmd); + auto sreply = std::make_shared<RequestBucketInfoReply>(cmd); sreply->setAddress(storageAddress(storageIndex)); api::RequestBucketInfoReply::EntryVector &vec = sreply->getBucketInfo(); @@ -261,7 +301,7 @@ public: } } - return std::shared_ptr<api::RequestBucketInfoReply>(sreply); + return sreply; } void fakeBucketReply(const lib::ClusterState &state, @@ -371,8 +411,7 @@ public: void setSystemState(const lib::ClusterState& state) { const size_t sizeBeforeState = _sender.commands.size(); getBucketDBUpdater().onSetSystemState( - std::shared_ptr<api::SetSystemStateCommand>( - new api::SetSystemStateCommand(state))); + std::make_shared<api::SetSystemStateCommand>(state)); // A lot of test logic has the assumption that all messages sent as a // result of cluster state changes will be in increasing index order // (for simplicity, not because this is required for correctness). @@ -381,6 +420,26 @@ public: sortSentMessagesByIndex(_sender, sizeBeforeState); } + void set_cluster_state_bundle(const lib::ClusterStateBundle& state) { + const size_t sizeBeforeState = _sender.commands.size(); + getBucketDBUpdater().onSetSystemState( + std::make_shared<api::SetSystemStateCommand>(state)); + sortSentMessagesByIndex(_sender, sizeBeforeState); + } + + bool activate_cluster_state_version(uint32_t version) { + return getBucketDBUpdater().onActivateClusterStateVersion( + std::make_shared<api::ActivateClusterStateVersionCommand>(version)); + } + + void assert_has_activate_cluster_state_reply_with_actual_version(uint32_t version) { + CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.replies.size()); + auto* response = dynamic_cast<api::ActivateClusterStateVersionReply*>(_sender.replies.back().get()); + CPPUNIT_ASSERT(response != nullptr); + CPPUNIT_ASSERT_EQUAL(version, response->actualVersion()); + _sender.clear(); + } + void completeBucketInfoGathering(const lib::ClusterState& state, size_t expectedMsgs, uint32_t bucketCount = 1, @@ -586,8 +645,9 @@ public: OutdatedNodesMap outdatedNodesMap; state = PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, owner.getBucketSpaceRepo(), cmd, outdatedNodesMap, - api::Timestamp(1)); + clock, clusterInfo, sender, + owner.getBucketSpaceRepo(), owner.getReadOnlyBucketSpaceRepo(), + cmd, outdatedNodesMap, api::Timestamp(1)); } PendingClusterStateFixture( @@ -598,23 +658,22 @@ public: owner.createClusterInfo(oldClusterState)); state = PendingClusterState::createForDistributionChange( - clock, clusterInfo, sender, owner.getBucketSpaceRepo(), api::Timestamp(1)); + clock, clusterInfo, sender, owner.getBucketSpaceRepo(), + owner.getReadOnlyBucketSpaceRepo(), api::Timestamp(1)); } }; - auto createPendingStateFixtureForStateChange( + std::unique_ptr<PendingClusterStateFixture> createPendingStateFixtureForStateChange( const std::string& oldClusterState, const std::string& newClusterState) { - return std::make_unique<PendingClusterStateFixture>( - *this, oldClusterState, newClusterState); + return std::make_unique<PendingClusterStateFixture>(*this, oldClusterState, newClusterState); } - auto createPendingStateFixtureForDistributionChange( + std::unique_ptr<PendingClusterStateFixture> createPendingStateFixtureForDistributionChange( const std::string& oldClusterState) { - return std::make_unique<PendingClusterStateFixture>( - *this, oldClusterState); + return std::make_unique<PendingClusterStateFixture>(*this, oldClusterState); } }; @@ -622,8 +681,8 @@ CPPUNIT_TEST_SUITE_REGISTRATION(BucketDBUpdaterTest); BucketDBUpdaterTest::BucketDBUpdaterTest() : CppUnit::TestFixture(), - DistributorTestUtil(), - _bucketSpaces() + DistributorTestUtil(), + _bucketSpaces() { } @@ -1533,7 +1592,8 @@ BucketDBUpdaterTest::getSentNodesDistributionChanged( ClusterInformation::CSP clusterInfo(createClusterInfo(oldClusterState)); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForDistributionChange( - clock, clusterInfo, sender, getBucketSpaceRepo(), api::Timestamp(1))); + clock, clusterInfo, sender, getBucketSpaceRepo(), + getReadOnlyBucketSpaceRepo(), api::Timestamp(1))); sortSentMessagesByIndex(sender); @@ -1698,8 +1758,8 @@ BucketDBUpdaterTest::testPendingClusterStateReceive() OutdatedNodesMap outdatedNodesMap; std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, - api::Timestamp(1))); + clock, clusterInfo, sender, getBucketSpaceRepo(), getReadOnlyBucketSpaceRepo(), + cmd, outdatedNodesMap, api::Timestamp(1))); CPPUNIT_ASSERT_EQUAL(messageCount(3), sender.commands.size()); @@ -1863,8 +1923,8 @@ BucketDBUpdaterTest::mergeBucketLists( ClusterInformation::CSP clusterInfo(createClusterInfo("cluster:d")); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, - beforeTime)); + clock, clusterInfo, sender, getBucketSpaceRepo(), getReadOnlyBucketSpaceRepo(), + cmd, outdatedNodesMap, beforeTime)); parseInputData(existingData, beforeTime, *state, includeBucketInfo); state->mergeIntoBucketDatabases(); @@ -1882,8 +1942,8 @@ BucketDBUpdaterTest::mergeBucketLists( ClusterInformation::CSP clusterInfo(createClusterInfo(oldState.toString())); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, - afterTime)); + clock, clusterInfo, sender, getBucketSpaceRepo(), getReadOnlyBucketSpaceRepo(), + cmd, outdatedNodesMap, afterTime)); parseInputData(newData, afterTime, *state, includeBucketInfo); state->mergeIntoBucketDatabases(); @@ -2599,4 +2659,192 @@ void BucketDBUpdaterTest::global_distribution_hash_falls_back_to_legacy_format_u CPPUNIT_ASSERT_EQUAL(current_hash, new_current_req.getDistributionHash()); } +namespace { + +template <typename Func> +void for_each_bucket(const BucketDatabase& db, const document::BucketSpace& space, Func&& f) { + BucketId last(0); + auto e = db.getNext(last); + while (e.valid()) { + f(space, e); + e = db.getNext(e.getBucketId()); + } +} + +template <typename Func> +void for_each_bucket(const DistributorBucketSpaceRepo& repo, Func&& f) { + for (const auto& space : repo) { + for_each_bucket(space.second->getBucketDatabase(), space.first, f); + } +} + +} + +using ConfigBuilder = vespa::config::content::core::StorDistributormanagerConfigBuilder; + +void BucketDBUpdaterTest::non_owned_buckets_moved_to_read_only_db_on_ownership_change() { + getConfig().setAllowStaleReadsDuringClusterStateTransitions(true); + + lib::ClusterState initial_state("distributor:1 storage:4"); // All buckets owned by us by definition + set_cluster_state_bundle(lib::ClusterStateBundle(initial_state, {}, false)); // Skip activation step for simplicity + + CPPUNIT_ASSERT_EQUAL(messageCount(4), _sender.commands.size()); + constexpr uint32_t n_buckets = 10; + completeBucketInfoGathering(initial_state, messageCount(4), n_buckets); + _sender.clear(); + + CPPUNIT_ASSERT_EQUAL(size_t(n_buckets), mutable_default_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(n_buckets), mutable_global_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_global_db().size()); + + lib::ClusterState pending_state("distributor:2 storage:4"); + + std::unordered_set<Bucket, Bucket::hash> buckets_not_owned_in_pending_state; + for_each_bucket(mutable_repo(), [&](const auto& space, const auto& entry) { + if (!getBucketDBUpdater().getDistributorComponent() + .ownsBucketInState(pending_state, makeDocumentBucket(entry.getBucketId()))) { + buckets_not_owned_in_pending_state.insert(Bucket(space, entry.getBucketId())); + } + }); + CPPUNIT_ASSERT(!buckets_not_owned_in_pending_state.empty()); + + set_cluster_state_bundle(lib::ClusterStateBundle(pending_state, {}, true)); // Now requires activation + + const auto buckets_not_owned_per_space = (buckets_not_owned_in_pending_state.size() / 2); // 2 spaces + const auto expected_mutable_buckets = n_buckets - buckets_not_owned_per_space; + CPPUNIT_ASSERT_EQUAL(expected_mutable_buckets, mutable_default_db().size()); + CPPUNIT_ASSERT_EQUAL(expected_mutable_buckets, mutable_global_db().size()); + CPPUNIT_ASSERT_EQUAL(buckets_not_owned_per_space, read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(buckets_not_owned_per_space, read_only_global_db().size()); + + for_each_bucket(read_only_repo(), [&](const auto& space, const auto& entry) { + CPPUNIT_ASSERT(buckets_not_owned_in_pending_state.find(Bucket(space, entry.getBucketId())) + != buckets_not_owned_in_pending_state.end()); + }); +} + +void BucketDBUpdaterTest::buckets_no_longer_available_are_not_moved_to_read_only_database() { + constexpr uint32_t n_buckets = 10; + // No ownership change, just node down. Test redundancy is 2, so removing 2 nodes will + // cause some buckets to be entirely unavailable. + trigger_completed_but_not_yet_activated_transition("version:1 distributor:1 storage:4", n_buckets, 4, + "version:2 distributor:1 storage:4 .0.s:d .1.s:m", n_buckets, 0); + + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_global_db().size()); +} + +void BucketDBUpdaterTest::non_owned_buckets_purged_when_read_only_support_is_config_disabled() { + getConfig().setAllowStaleReadsDuringClusterStateTransitions(false); + + lib::ClusterState initial_state("distributor:1 storage:4"); // All buckets owned by us by definition + set_cluster_state_bundle(lib::ClusterStateBundle(initial_state, {}, false)); // Skip activation step for simplicity + + CPPUNIT_ASSERT_EQUAL(messageCount(4), _sender.commands.size()); + constexpr uint32_t n_buckets = 10; + completeBucketInfoGathering(initial_state, messageCount(4), n_buckets); + _sender.clear(); + + // Nothing in read-only DB after first bulk load of buckets. + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_global_db().size()); + + lib::ClusterState pending_state("distributor:2 storage:4"); + setSystemState(pending_state); + // No buckets should be moved into read only db after ownership changes. + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(size_t(0), read_only_global_db().size()); +} + +void BucketDBUpdaterTest::trigger_completed_but_not_yet_activated_transition( + vespalib::stringref initial_state_str, + uint32_t initial_buckets, + uint32_t initial_expected_msgs, + vespalib::stringref pending_state_str, + uint32_t pending_buckets, + uint32_t pending_expected_msgs) +{ + getConfig().setAllowStaleReadsDuringClusterStateTransitions(true); + lib::ClusterState initial_state(initial_state_str); + setSystemState(initial_state); + CPPUNIT_ASSERT_EQUAL(messageCount(initial_expected_msgs), _sender.commands.size()); + completeBucketInfoGathering(initial_state, messageCount(initial_expected_msgs), initial_buckets); + _sender.clear(); + + lib::ClusterState pending_state(pending_state_str); // Ownership change + set_cluster_state_bundle(lib::ClusterStateBundle(pending_state, {}, true)); + CPPUNIT_ASSERT_EQUAL(messageCount(pending_expected_msgs), _sender.commands.size()); + completeBucketInfoGathering(pending_state, messageCount(pending_expected_msgs), pending_buckets); + _sender.clear(); +} + +void BucketDBUpdaterTest::deferred_activated_state_does_not_enable_state_until_activation_received() { + constexpr uint32_t n_buckets = 10; + trigger_completed_but_not_yet_activated_transition("version:1 distributor:2 storage:4", 0, 4, + "version:2 distributor:1 storage:4", n_buckets, 4); + + // Version should not be switched over yet + CPPUNIT_ASSERT_EQUAL(uint32_t(1), getDistributor().getClusterStateBundle().getVersion()); + + CPPUNIT_ASSERT_EQUAL(uint64_t(0), mutable_default_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), mutable_global_db().size()); + + CPPUNIT_ASSERT(!activate_cluster_state_version(2)); + + CPPUNIT_ASSERT_EQUAL(uint32_t(2), getDistributor().getClusterStateBundle().getVersion()); + CPPUNIT_ASSERT_EQUAL(uint64_t(n_buckets), mutable_default_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(n_buckets), mutable_global_db().size()); +} + +void BucketDBUpdaterTest::read_only_db_cleared_once_pending_state_is_activated() { + constexpr uint32_t n_buckets = 10; + trigger_completed_but_not_yet_activated_transition("version:1 distributor:1 storage:4", n_buckets, 4, + "version:2 distributor:2 storage:4", n_buckets, 0); + CPPUNIT_ASSERT(!activate_cluster_state_version(2)); + + CPPUNIT_ASSERT_EQUAL(uint64_t(0), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), read_only_global_db().size()); +} + +void BucketDBUpdaterTest::read_only_db_is_populated_even_when_self_is_marked_down() { + constexpr uint32_t n_buckets = 10; + trigger_completed_but_not_yet_activated_transition("version:1 distributor:1 storage:4", n_buckets, 4, + "version:2 distributor:1 .0.s:d storage:4", n_buckets, 0); + + // State not yet activated, so read-only DBs have got all the buckets we used to have. + CPPUNIT_ASSERT_EQUAL(uint64_t(0), mutable_default_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(0), mutable_global_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(n_buckets), read_only_default_db().size()); + CPPUNIT_ASSERT_EQUAL(uint64_t(n_buckets), read_only_global_db().size()); +} + +void BucketDBUpdaterTest::activate_cluster_state_request_with_mismatching_version_returns_actual_version() { + constexpr uint32_t n_buckets = 10; + trigger_completed_but_not_yet_activated_transition("version:4 distributor:1 storage:4", n_buckets, 4, + "version:5 distributor:2 storage:4", n_buckets, 0); + + CPPUNIT_ASSERT(activate_cluster_state_version(4)); // Too old version + assert_has_activate_cluster_state_reply_with_actual_version(5); + + CPPUNIT_ASSERT(activate_cluster_state_version(6)); // More recent version than what has been observed + assert_has_activate_cluster_state_reply_with_actual_version(5); +} + +void BucketDBUpdaterTest::activate_cluster_state_request_without_pending_transition_passes_message_through() { + constexpr uint32_t n_buckets = 10; + trigger_completed_but_not_yet_activated_transition("version:1 distributor:2 storage:4", 0, 4, + "version:2 distributor:1 storage:4", n_buckets, 4); + // Activate version 2; no pending cluster state after this. + CPPUNIT_ASSERT(!activate_cluster_state_version(2)); + + // No pending cluster state for version 3, just passed through to be implicitly bounced by state manager. + // Note: state manager is not modelled in this test, so we just check that the message handler returns + // false (meaning "didn't take message ownership") and there's no auto-generated reply. + CPPUNIT_ASSERT(!activate_cluster_state_version(3)); + CPPUNIT_ASSERT_EQUAL(size_t(0), _sender.replies.size()); +} + +// TODO rename distributor config to imply two phase functionlity explicitly? + } diff --git a/storage/src/tests/distributor/distributortestutil.cpp b/storage/src/tests/distributor/distributortestutil.cpp index d3496d0c9f6..3f7f2eac63a 100644 --- a/storage/src/tests/distributor/distributortestutil.cpp +++ b/storage/src/tests/distributor/distributortestutil.cpp @@ -388,6 +388,16 @@ DistributorTestUtil::getBucketSpaceRepo() const { return _distributor->getBucketSpaceRepo(); } +DistributorBucketSpaceRepo & +DistributorTestUtil::getReadOnlyBucketSpaceRepo() { + return _distributor->getReadOnlyBucketSpaceRepo(); +} + +const DistributorBucketSpaceRepo & +DistributorTestUtil::getReadOnlyBucketSpaceRepo() const { + return _distributor->getReadOnlyBucketSpaceRepo(); +} + const lib::Distribution& DistributorTestUtil::getDistribution() const { return getBucketSpaceRepo().get(makeBucketSpace()).getDistribution(); diff --git a/storage/src/tests/distributor/distributortestutil.h b/storage/src/tests/distributor/distributortestutil.h index 10cc5eeaca1..420111437d2 100644 --- a/storage/src/tests/distributor/distributortestutil.h +++ b/storage/src/tests/distributor/distributortestutil.h @@ -132,6 +132,8 @@ public: const BucketDatabase& getBucketDatabase(document::BucketSpace space) const; DistributorBucketSpaceRepo &getBucketSpaceRepo(); const DistributorBucketSpaceRepo &getBucketSpaceRepo() const; + DistributorBucketSpaceRepo& getReadOnlyBucketSpaceRepo(); + const DistributorBucketSpaceRepo& getReadOnlyBucketSpaceRepo() const; const lib::Distribution& getDistribution() const; // "End to end" distribution change trigger, which will invoke the bucket diff --git a/storage/src/tests/distributor/externaloperationhandlertest.cpp b/storage/src/tests/distributor/externaloperationhandlertest.cpp index ddf88f50c36..40fe885dcb1 100644 --- a/storage/src/tests/distributor/externaloperationhandlertest.cpp +++ b/storage/src/tests/distributor/externaloperationhandlertest.cpp @@ -4,6 +4,7 @@ #include <vespa/storage/distributor/externaloperationhandler.h> #include <vespa/storage/distributor/distributor.h> #include <vespa/storage/distributor/distributormetricsset.h> +#include <vespa/storage/distributor/operations/external/getoperation.h> #include <vespa/storageapi/message/persistence.h> #include <vespa/document/repo/documenttyperepo.h> #include <vespa/document/update/documentupdate.h> @@ -20,8 +21,11 @@ class ExternalOperationHandlerTest : public CppUnit::TestFixture, CPPUNIT_TEST_SUITE(ExternalOperationHandlerTest); CPPUNIT_TEST(testBucketSplitMask); - CPPUNIT_TEST(testOperationRejectedOnWrongDistribution); - CPPUNIT_TEST(testOperationRejectedOnPendingWrongDistribution); + CPPUNIT_TEST(mutating_operation_wdr_bounced_on_wrong_current_distribution); + CPPUNIT_TEST(mutating_operation_busy_bounced_on_wrong_pending_distribution); + CPPUNIT_TEST(mutating_operation_busy_bounced_if_no_cluster_state_received_yet); + CPPUNIT_TEST(read_only_operation_wdr_bounced_on_wrong_current_distribution); + CPPUNIT_TEST(read_only_operation_busy_bounced_if_no_cluster_state_received_yet); CPPUNIT_TEST(reject_put_if_not_past_safe_time_point); CPPUNIT_TEST(reject_remove_if_not_past_safe_time_point); CPPUNIT_TEST(reject_update_if_not_past_safe_time_point); @@ -37,6 +41,9 @@ class ExternalOperationHandlerTest : public CppUnit::TestFixture, CPPUNIT_TEST(concurrent_get_and_mutation_do_not_conflict); CPPUNIT_TEST(sequencing_works_across_mutation_types); CPPUNIT_TEST(sequencing_can_be_explicitly_config_disabled); + CPPUNIT_TEST(gets_are_started_with_mutable_db_outside_transition_period); + CPPUNIT_TEST(gets_are_started_with_read_only_db_during_transition_period); + CPPUNIT_TEST(gets_are_busy_bounced_during_transition_period_if_stale_reads_disabled); CPPUNIT_TEST_SUITE_END(); document::BucketId findNonOwnedUserBucketInState(vespalib::stringref state); @@ -49,10 +56,13 @@ class ExternalOperationHandlerTest : public CppUnit::TestFixture, std::shared_ptr<api::UpdateCommand> makeUpdateCommand(const vespalib::string& doc_type, const vespalib::string& id) const; std::shared_ptr<api::UpdateCommand> makeUpdateCommand() const; + std::shared_ptr<api::UpdateCommand> makeUpdateCommandForUser(uint64_t id) const; std::shared_ptr<api::PutCommand> makePutCommand(const vespalib::string& doc_type, const vespalib::string& id) const; std::shared_ptr<api::RemoveCommand> makeRemoveCommand(const vespalib::string& id) const; + void verify_busy_bounced_due_to_no_active_state(std::shared_ptr<api::StorageCommand> cmd); + Operation::SP start_operation_verify_not_rejected(std::shared_ptr<api::StorageCommand> cmd); void start_operation_verify_rejected(std::shared_ptr<api::StorageCommand> cmd); @@ -80,10 +90,16 @@ class ExternalOperationHandlerTest : public CppUnit::TestFixture, const vespalib::string _dummy_id{"id:foo:testdoctype1::bar"}; + // Returns an arbitrary bucket not owned in the pending state + document::BucketId set_up_pending_cluster_state_transition(bool read_only_enabled); + protected: void testBucketSplitMask(); - void testOperationRejectedOnWrongDistribution(); - void testOperationRejectedOnPendingWrongDistribution(); + void mutating_operation_wdr_bounced_on_wrong_current_distribution(); + void mutating_operation_busy_bounced_on_wrong_pending_distribution(); + void mutating_operation_busy_bounced_if_no_cluster_state_received_yet(); + void read_only_operation_wdr_bounced_on_wrong_current_distribution(); + void read_only_operation_busy_bounced_if_no_cluster_state_received_yet(); void reject_put_if_not_past_safe_time_point(); void reject_remove_if_not_past_safe_time_point(); void reject_update_if_not_past_safe_time_point(); @@ -99,6 +115,9 @@ protected: void concurrent_get_and_mutation_do_not_conflict(); void sequencing_works_across_mutation_types(); void sequencing_can_be_explicitly_config_disabled(); + void gets_are_started_with_mutable_db_outside_transition_period(); + void gets_are_started_with_read_only_db_during_transition_period(); + void gets_are_busy_bounced_during_transition_period_if_stale_reads_disabled(); void assert_rejection_due_to_unsafe_time( std::shared_ptr<api::StorageCommand> cmd); @@ -220,6 +239,11 @@ ExternalOperationHandlerTest::makeUpdateCommand() const { return makeUpdateCommand("testdoctype1", "id:foo:testdoctype1::baz"); } +std::shared_ptr<api::UpdateCommand> +ExternalOperationHandlerTest::makeUpdateCommandForUser(uint64_t id) const { + return makeUpdateCommand("testdoctype1", vespalib::make_string("id::testdoctype1:n=%" PRIu64 ":bar", id)); +} + std::shared_ptr<api::PutCommand> ExternalOperationHandlerTest::makePutCommand( const vespalib::string& doc_type, const vespalib::string& id) const { @@ -233,10 +257,30 @@ std::shared_ptr<api::RemoveCommand> ExternalOperationHandlerTest::makeRemoveComm } void -ExternalOperationHandlerTest::testOperationRejectedOnWrongDistribution() +ExternalOperationHandlerTest::mutating_operation_wdr_bounced_on_wrong_current_distribution() { createLinks(); - std::string state("distributor:2 storage:2"); + std::string state("version:1 distributor:2 storage:2"); + setupDistributor(1, 2, state); + + document::BucketId bucket(findNonOwnedUserBucketInState(state)); + auto cmd = makeUpdateCommandForUser(bucket.withoutCountBits()); + + Operation::SP genOp; + CPPUNIT_ASSERT(getExternalOperationHandler().handleMessage(cmd, genOp)); + CPPUNIT_ASSERT(!genOp.get()); + CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.replies.size()); + CPPUNIT_ASSERT_EQUAL( + std::string("ReturnCode(WRONG_DISTRIBUTION, " + "version:1 distributor:2 storage:2)"), + _sender.replies[0]->getResult().toString()); +} + +void +ExternalOperationHandlerTest::read_only_operation_wdr_bounced_on_wrong_current_distribution() +{ + createLinks(); + std::string state("version:1 distributor:2 storage:2"); setupDistributor(1, 2, state); document::BucketId bucket(findNonOwnedUserBucketInState(state)); @@ -248,43 +292,65 @@ ExternalOperationHandlerTest::testOperationRejectedOnWrongDistribution() CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.replies.size()); CPPUNIT_ASSERT_EQUAL( std::string("ReturnCode(WRONG_DISTRIBUTION, " - "distributor:2 storage:2)"), + "version:1 distributor:2 storage:2)"), _sender.replies[0]->getResult().toString()); } void -ExternalOperationHandlerTest::testOperationRejectedOnPendingWrongDistribution() +ExternalOperationHandlerTest::mutating_operation_busy_bounced_on_wrong_pending_distribution() { createLinks(); - std::string current("distributor:2 storage:2"); - std::string pending("distributor:3 storage:3"); + std::string current("version:10 distributor:2 storage:2"); + std::string pending("version:11 distributor:3 storage:3"); setupDistributor(1, 3, current); document::BucketId b(findOwned1stNotOwned2ndInStates(current, pending)); // Trigger pending cluster state - auto stateCmd = std::make_shared<api::SetSystemStateCommand>( - lib::ClusterState(pending)); + auto stateCmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState(pending)); getBucketDBUpdater().onSetSystemState(stateCmd); - auto cmd = makeGetCommandForUser(b.withoutCountBits()); + auto cmd = makeUpdateCommandForUser(b.withoutCountBits()); Operation::SP genOp; CPPUNIT_ASSERT(getExternalOperationHandler().handleMessage(cmd, genOp)); CPPUNIT_ASSERT(!genOp.get()); CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.replies.size()); - // Fail back with _pending_ cluster state so client can start trying - // correct distributor immediately. If that distributor has not yet - // completed processing its pending cluster state, it'll return the - // old (current) cluster state, causing the client to bounce between - // the two until the pending states have been resolved. This is pretty - // much inevitable with the current design. CPPUNIT_ASSERT_EQUAL( - std::string("ReturnCode(WRONG_DISTRIBUTION, " - "distributor:3 storage:3)"), + std::string("ReturnCode(BUSY, Currently pending cluster state transition from version 10 to 11)"), _sender.replies[0]->getResult().toString()); } +void +ExternalOperationHandlerTest::verify_busy_bounced_due_to_no_active_state(std::shared_ptr<api::StorageCommand> cmd) +{ + createLinks(); + std::string state{}; // No version --> not yet received + setupDistributor(1, 2, state); + + Operation::SP genOp; + CPPUNIT_ASSERT(getExternalOperationHandler().handleMessage(cmd, genOp)); + CPPUNIT_ASSERT(!genOp.get()); + CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.replies.size()); + CPPUNIT_ASSERT_EQUAL( + std::string("ReturnCode(BUSY, No cluster state activated yet)"), + _sender.replies[0]->getResult().toString()); +} + +// TODO NOT_READY is a more appropriate return code for this case, but must ensure it's +// handled gracefully and silently through the stack. BUSY is a safe bet until then. +void +ExternalOperationHandlerTest::mutating_operation_busy_bounced_if_no_cluster_state_received_yet() +{ + verify_busy_bounced_due_to_no_active_state(makeUpdateCommandForUser(12345)); +} + +void +ExternalOperationHandlerTest::read_only_operation_busy_bounced_if_no_cluster_state_received_yet() +{ + verify_busy_bounced_due_to_no_active_state(makeGetCommandForUser(12345)); +} + using TimePoint = ExternalOperationHandler::TimePoint; using namespace std::literals::chrono_literals; @@ -292,7 +358,7 @@ void ExternalOperationHandlerTest::assert_rejection_due_to_unsafe_time( std::shared_ptr<api::StorageCommand> cmd) { createLinks(); - setupDistributor(1, 2, "distributor:1 storage:1"); + setupDistributor(1, 2, "version:1 distributor:1 storage:1"); getClock().setAbsoluteTimeInSeconds(9); getExternalOperationHandler().rejectFeedBeforeTimeReached(TimePoint(10s)); @@ -327,7 +393,7 @@ void ExternalOperationHandlerTest::reject_update_if_not_past_safe_time_point() { void ExternalOperationHandlerTest::get_not_rejected_by_unsafe_time_point() { createLinks(); - setupDistributor(1, 2, "distributor:1 storage:1"); + setupDistributor(1, 2, "version:1 distributor:1 storage:1"); getClock().setAbsoluteTimeInSeconds(9); getExternalOperationHandler().rejectFeedBeforeTimeReached(TimePoint(10s)); @@ -342,7 +408,7 @@ void ExternalOperationHandlerTest::get_not_rejected_by_unsafe_time_point() { void ExternalOperationHandlerTest::mutation_not_rejected_when_safe_point_reached() { createLinks(); - setupDistributor(1, 2, "distributor:1 storage:1"); + setupDistributor(1, 2, "version:1 distributor:1 storage:1"); getClock().setAbsoluteTimeInSeconds(10); getExternalOperationHandler().rejectFeedBeforeTimeReached(TimePoint(10s)); @@ -360,7 +426,7 @@ void ExternalOperationHandlerTest::mutation_not_rejected_when_safe_point_reached void ExternalOperationHandlerTest::set_up_distributor_for_sequencing_test() { createLinks(); - setupDistributor(1, 2, "distributor:1 storage:1"); + setupDistributor(1, 2, "version:1 distributor:1 storage:1"); } Operation::SP ExternalOperationHandlerTest::start_operation_verify_not_rejected( @@ -486,6 +552,52 @@ void ExternalOperationHandlerTest::sequencing_can_be_explicitly_config_disabled( start_operation_verify_not_rejected(makeRemoveCommand(_dummy_id)); } +void ExternalOperationHandlerTest::gets_are_started_with_mutable_db_outside_transition_period() { + createLinks(); + std::string current = "version:1 distributor:1 storage:3"; + setupDistributor(1, 3, current); + getConfig().setAllowStaleReadsDuringClusterStateTransitions(true); + + document::BucketId b(16, 1234); // Only 1 distributor (us), so doesn't matter + + auto op = start_operation_verify_not_rejected(makeGetCommandForUser(b.withoutCountBits())); + auto& get_op = dynamic_cast<GetOperation&>(*op); + const auto* expected_space = &getBucketSpaceRepo().get(document::FixedBucketSpaces::default_space()); + CPPUNIT_ASSERT_EQUAL(expected_space, &get_op.bucketSpace()); +} + +document::BucketId ExternalOperationHandlerTest::set_up_pending_cluster_state_transition(bool read_only_enabled) { + createLinks(); + std::string current = "version:123 distributor:2 storage:2"; + std::string pending = "version:321 distributor:3 storage:3"; + setupDistributor(1, 3, current); + getConfig().setAllowStaleReadsDuringClusterStateTransitions(read_only_enabled); + + // Trigger pending cluster state + auto stateCmd = std::make_shared<api::SetSystemStateCommand>(lib::ClusterState(pending)); + getBucketDBUpdater().onSetSystemState(stateCmd); + return findOwned1stNotOwned2ndInStates(current, pending); +} + +void ExternalOperationHandlerTest::gets_are_started_with_read_only_db_during_transition_period() { + auto non_owned_bucket = set_up_pending_cluster_state_transition(true); + + auto op = start_operation_verify_not_rejected(makeGetCommandForUser(non_owned_bucket.withoutCountBits())); + auto& get_op = dynamic_cast<GetOperation&>(*op); + const auto* expected_space = &getReadOnlyBucketSpaceRepo().get(document::FixedBucketSpaces::default_space()); + CPPUNIT_ASSERT_EQUAL(expected_space, &get_op.bucketSpace()); +} + +void ExternalOperationHandlerTest::gets_are_busy_bounced_during_transition_period_if_stale_reads_disabled() { + auto non_owned_bucket = set_up_pending_cluster_state_transition(false); + + start_operation_verify_rejected(makeGetCommandForUser(non_owned_bucket.withoutCountBits())); + CPPUNIT_ASSERT_EQUAL( + std::string("ReturnCode(BUSY, Currently pending cluster state transition from version 123 to 321)"), + _sender.replies[0]->getResult().toString()); + +} + // TODO support sequencing of RemoveLocation? It's a mutating operation, but supporting it with // the current approach is not trivial. A RemoveLocation operation covers the _entire_ bucket // sub tree under a given location, while the sequencer works on individual GIDs. Mapping the diff --git a/storage/src/tests/storageserver/bouncertest.cpp b/storage/src/tests/storageserver/bouncertest.cpp index 27c13a3707e..371c24accbc 100644 --- a/storage/src/tests/storageserver/bouncertest.cpp +++ b/storage/src/tests/storageserver/bouncertest.cpp @@ -43,6 +43,7 @@ struct BouncerTest : public CppUnit::TestFixture { void outOfBoundsConfigValuesThrowException(); void abort_request_when_derived_bucket_space_node_state_is_marked_down(); void client_operations_are_allowed_through_on_cluster_state_down_distributor(); + void cluster_state_activation_commands_are_not_bounced(); CPPUNIT_TEST_SUITE(BouncerTest); CPPUNIT_TEST(testFutureTimestamp); @@ -57,6 +58,7 @@ struct BouncerTest : public CppUnit::TestFixture { CPPUNIT_TEST(outOfBoundsConfigValuesThrowException); CPPUNIT_TEST(abort_request_when_derived_bucket_space_node_state_is_marked_down); CPPUNIT_TEST(client_operations_are_allowed_through_on_cluster_state_down_distributor); + CPPUNIT_TEST(cluster_state_activation_commands_are_not_bounced); CPPUNIT_TEST_SUITE_END(); using Priority = api::StorageMessage::Priority; @@ -368,5 +370,17 @@ void BouncerTest::client_operations_are_allowed_through_on_cluster_state_down_di CPPUNIT_ASSERT_EQUAL(uint64_t(0), _manager->metrics().unavailable_node_aborts.getValue()); } +void BouncerTest::cluster_state_activation_commands_are_not_bounced() { + tearDown(); + setUpAsNode(lib::NodeType::DISTRIBUTOR); + + auto state = makeClusterStateBundle("version:10 distributor:3 .2.s:d storage:3", {}); // Our index (2) is down + _node->getNodeStateUpdater().setClusterStateBundle(state); + + auto activate_cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(11); + _upper->sendDown(activate_cmd); + assertMessageNotBounced(); +} + } // storage diff --git a/storage/src/tests/storageserver/fnet_listener_test.cpp b/storage/src/tests/storageserver/fnet_listener_test.cpp index 84051041d25..d40b230d725 100644 --- a/storage/src/tests/storageserver/fnet_listener_test.cpp +++ b/storage/src/tests/storageserver/fnet_listener_test.cpp @@ -27,6 +27,9 @@ public: CPPUNIT_TEST(set_distribution_rpc_is_immediately_failed_if_listener_is_closed); CPPUNIT_TEST(overly_large_uncompressed_bundle_size_parameter_returns_rpc_error); CPPUNIT_TEST(mismatching_uncompressed_bundle_size_parameter_returns_rpc_error); + CPPUNIT_TEST(true_deferred_activation_flag_can_be_roundtrip_encoded); + CPPUNIT_TEST(false_deferred_activation_flag_can_be_roundtrip_encoded); + CPPUNIT_TEST(activate_cluster_state_version_rpc_enqueues_command_with_version); CPPUNIT_TEST_SUITE_END(); void baseline_set_distribution_states_rpc_enqueues_command_with_state_bundle(); @@ -35,6 +38,9 @@ public: void set_distribution_rpc_is_immediately_failed_if_listener_is_closed(); void overly_large_uncompressed_bundle_size_parameter_returns_rpc_error(); void mismatching_uncompressed_bundle_size_parameter_returns_rpc_error(); + void true_deferred_activation_flag_can_be_roundtrip_encoded(); + void false_deferred_activation_flag_can_be_roundtrip_encoded(); + void activate_cluster_state_version_rpc_enqueues_command_with_version(); }; CPPUNIT_TEST_SUITE_REGISTRATION(FNetListenerTest); @@ -54,24 +60,25 @@ struct DummyReturnHandler : FRT_IReturnHandler { FNET_Connection* GetConnection() override { return nullptr; } }; -struct Fixture { +struct FixtureBase { // TODO factor out Slobrok code to avoid need to set up live ports for unrelated tests mbus::Slobrok slobrok; vdstestlib::DirConfig config; MockOperationEnqueuer enqueuer; std::unique_ptr<FNetListener> fnet_listener; - SlimeClusterStateBundleCodec codec; DummyReturnHandler return_handler; bool request_is_detached{false}; FRT_RPCRequest* bound_request{nullptr}; - Fixture() : config(getStandardConfig(true)) { + FixtureBase() + : config(getStandardConfig(true)) + { config.getConfig("stor-server").set("node_index", "1"); addSlobrokConfig(config, slobrok); fnet_listener = std::make_unique<FNetListener>(enqueuer, config.getConfigId(), 0); } - ~Fixture() { + virtual ~FixtureBase() { // Must destroy any associated message contexts that may have refs to FRT_Request // instance _before_ we destroy the request itself. enqueuer._enqueued.clear(); @@ -79,6 +86,12 @@ struct Fixture { bound_request->SubRef(); } } +}; + +struct SetStateFixture : FixtureBase { + SlimeClusterStateBundleCodec codec; + + SetStateFixture() : FixtureBase() {} void bind_request_params(EncodedClusterStateBundle& encoded_bundle, uint32_t uncompressed_length) { bound_request = new FRT_RPCRequest(); // Naked new isn't pretty, but FRT_RPCRequest has internal refcounting @@ -123,6 +136,10 @@ struct Fixture { lib::ClusterStateBundle dummy_baseline_bundle() const { return lib::ClusterStateBundle(lib::ClusterState("version:123 distributor:3 storage:3")); } + + lib::ClusterStateBundle dummy_baseline_bundle_with_deferred_activation(bool deferred) const { + return lib::ClusterStateBundle(lib::ClusterState("version:123 distributor:3 storage:3"), {}, deferred); + } }; std::shared_ptr<const lib::ClusterState> state_of(vespalib::stringref state) { @@ -138,17 +155,17 @@ vespalib::string make_compressable_state_string() { ss.str().data(), ss.str().data()); } -} +} // anon namespace void FNetListenerTest::baseline_set_distribution_states_rpc_enqueues_command_with_state_bundle() { - Fixture f; + SetStateFixture f; auto baseline = f.dummy_baseline_bundle(); f.assert_request_received_and_propagated(baseline); } void FNetListenerTest::set_distribution_states_rpc_with_derived_enqueues_command_with_state_bundle() { - Fixture f; + SetStateFixture f; lib::ClusterStateBundle spaces_bundle( lib::ClusterState("version:123 distributor:3 storage:3"), {{FixedBucketSpaces::default_space(), state_of("version:123 distributor:3 storage:3 .0.s:d")}, @@ -158,7 +175,7 @@ void FNetListenerTest::set_distribution_states_rpc_with_derived_enqueues_command } void FNetListenerTest::compressed_bundle_is_transparently_uncompressed() { - Fixture f; + SetStateFixture f; auto state_str = make_compressable_state_string(); lib::ClusterStateBundle compressable_bundle{lib::ClusterState(state_str)}; @@ -171,24 +188,73 @@ void FNetListenerTest::compressed_bundle_is_transparently_uncompressed() { } void FNetListenerTest::set_distribution_rpc_is_immediately_failed_if_listener_is_closed() { - Fixture f; + SetStateFixture f; f.create_request(f.dummy_baseline_bundle()); f.fnet_listener->close(); f.assert_request_returns_error_response(RPCRequestWrapper::ERR_NODE_SHUTTING_DOWN); } void FNetListenerTest::overly_large_uncompressed_bundle_size_parameter_returns_rpc_error() { - Fixture f; + SetStateFixture f; auto encoded_bundle = f.codec.encode(f.dummy_baseline_bundle()); f.bind_request_params(encoded_bundle, FNetListener::StateBundleMaxUncompressedSize + 1); f.assert_request_returns_error_response(RPCRequestWrapper::ERR_BAD_REQUEST); } void FNetListenerTest::mismatching_uncompressed_bundle_size_parameter_returns_rpc_error() { - Fixture f; + SetStateFixture f; auto encoded_bundle = f.codec.encode(f.dummy_baseline_bundle()); f.bind_request_params(encoded_bundle, encoded_bundle._buffer->getDataLen() + 100); f.assert_request_returns_error_response(RPCRequestWrapper::ERR_BAD_REQUEST); } +void FNetListenerTest::true_deferred_activation_flag_can_be_roundtrip_encoded() { + SetStateFixture f; + f.assert_request_received_and_propagated(f.dummy_baseline_bundle_with_deferred_activation(true)); + +} + +void FNetListenerTest::false_deferred_activation_flag_can_be_roundtrip_encoded() { + SetStateFixture f; + f.assert_request_received_and_propagated(f.dummy_baseline_bundle_with_deferred_activation(false)); +} + +struct ActivateStateFixture : FixtureBase { + ActivateStateFixture() : FixtureBase() {} + + void bind_request_params(uint32_t activate_version) { + bound_request = new FRT_RPCRequest(); // Naked new isn't pretty, but FRT_RPCRequest has internal refcounting + auto* params = bound_request->GetParams(); + params->AddInt32(activate_version); + + bound_request->SetDetachedPT(&request_is_detached); + bound_request->SetReturnHandler(&return_handler); + } + + void create_request(uint32_t activate_version) { + // Only 1 request allowed per fixture due to lifetime handling snags + assert(bound_request == nullptr); + bind_request_params(activate_version); + } + + void assert_enqueued_operation_has_activate_version(uint32_t version) { + CPPUNIT_ASSERT(bound_request != nullptr); + CPPUNIT_ASSERT(request_is_detached); + CPPUNIT_ASSERT_EQUAL(size_t(1), enqueuer._enqueued.size()); + auto& state_request = dynamic_cast<const api::ActivateClusterStateVersionCommand&>(*enqueuer._enqueued[0]); + CPPUNIT_ASSERT_EQUAL(version, state_request.version()); + } + + void assert_request_received_and_propagated(uint32_t activate_version) { + create_request(activate_version); + fnet_listener->RPC_activateClusterStateVersion(bound_request); + assert_enqueued_operation_has_activate_version(activate_version); + } +}; + +void FNetListenerTest::activate_cluster_state_version_rpc_enqueues_command_with_version() { + ActivateStateFixture f; + f.assert_request_received_and_propagated(1234567); +} + } diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp index 19f414482db..cdf990fa28f 100644 --- a/storage/src/tests/storageserver/statemanagertest.cpp +++ b/storage/src/tests/storageserver/statemanagertest.cpp @@ -37,6 +37,7 @@ struct StateManagerTest : public CppUnit::TestFixture { void can_explicitly_send_get_node_state_reply(); void explicit_node_state_replying_without_pending_request_immediately_replies_on_next_request(); void immediate_node_state_replying_is_tracked_per_controller(); + void activation_command_is_bounced_with_current_cluster_state_version(); CPPUNIT_TEST_SUITE(StateManagerTest); CPPUNIT_TEST(testSystemState); @@ -45,8 +46,10 @@ struct StateManagerTest : public CppUnit::TestFixture { CPPUNIT_TEST(can_explicitly_send_get_node_state_reply); CPPUNIT_TEST(explicit_node_state_replying_without_pending_request_immediately_replies_on_next_request); CPPUNIT_TEST(immediate_node_state_replying_is_tracked_per_controller); + CPPUNIT_TEST(activation_command_is_bounced_with_current_cluster_state_version); CPPUNIT_TEST_SUITE_END(); + void force_current_cluster_state_version(uint32_t version); void mark_reported_node_state_up(); void send_down_get_node_state_request(uint16_t controller_index); void assert_ok_get_node_state_reply_sent_and_clear(); @@ -101,6 +104,12 @@ StateManagerTest::tearDown() { _metricManager.reset(); } +void StateManagerTest::force_current_cluster_state_version(uint32_t version) { + ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState()); + state.setVersion(version); + _manager->setClusterStateBundle(lib::ClusterStateBundle(state)); +} + #define GET_ONLY_OK_REPLY(varname) \ { \ CPPUNIT_ASSERT_EQUAL(size_t(1), _upper->getNumReplies()); \ @@ -236,9 +245,7 @@ StateManagerTest::testReportedNodeState() } void StateManagerTest::current_cluster_state_version_is_included_in_host_info_json() { - ClusterState state(*_manager->getClusterStateBundle()->getBaselineClusterState()); - state.setVersion(123); - _manager->setClusterStateBundle(lib::ClusterStateBundle(state)); + force_current_cluster_state_version(123); std::string nodeInfoString(_manager->getNodeInfo()); vespalib::Memory goldenMemory(nodeInfoString); @@ -343,4 +350,21 @@ void StateManagerTest::immediate_node_state_replying_is_tracked_per_controller() CPPUNIT_ASSERT_EQUAL(size_t(0), _upper->getNumReplies()); } +void StateManagerTest::activation_command_is_bounced_with_current_cluster_state_version() { + force_current_cluster_state_version(12345); + + auto cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(12340); + cmd->setTimeout(10000000); + cmd->setSourceIndex(0); + _upper->sendDown(cmd); + + CPPUNIT_ASSERT_EQUAL(size_t(1), _upper->getNumReplies()); + std::shared_ptr<api::StorageReply> reply; + GET_ONLY_OK_REPLY(reply); // Implicitly clears messages from _upper + CPPUNIT_ASSERT_EQUAL(api::MessageType::ACTIVATE_CLUSTER_STATE_VERSION_REPLY, reply->getType()); + auto& activate_reply = dynamic_cast<api::ActivateClusterStateVersionReply&>(*reply); + CPPUNIT_ASSERT_EQUAL(uint32_t(12340), activate_reply.activateVersion()); + CPPUNIT_ASSERT_EQUAL(uint32_t(12345), activate_reply.actualVersion()); +} + } // storage diff --git a/storage/src/vespa/storage/config/distributorconfiguration.cpp b/storage/src/vespa/storage/config/distributorconfiguration.cpp index 44cf56fdff8..294ce56f536 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.cpp +++ b/storage/src/vespa/storage/config/distributorconfiguration.cpp @@ -34,6 +34,7 @@ DistributorConfiguration::DistributorConfiguration(StorageComponent& component) _enableHostInfoReporting(true), _disableBucketActivation(false), _sequenceMutatingOperations(true), + _allowStaleReadsDuringClusterStateTransitions(false), _minimumReplicaCountingMode(ReplicaCountingMode::TRUSTED) { } @@ -144,6 +145,7 @@ DistributorConfiguration::configure(const vespa::config::content::core::StorDist _enableHostInfoReporting = config.enableHostInfoReporting; _disableBucketActivation = config.disableBucketActivation; _sequenceMutatingOperations = config.sequenceMutatingOperations; + _allowStaleReadsDuringClusterStateTransitions = config.allowStaleReadsDuringClusterStateTransitions; _minimumReplicaCountingMode = config.minimumReplicaCountingMode; diff --git a/storage/src/vespa/storage/config/distributorconfiguration.h b/storage/src/vespa/storage/config/distributorconfiguration.h index 5dfc4f66cb8..8c84fef47b5 100644 --- a/storage/src/vespa/storage/config/distributorconfiguration.h +++ b/storage/src/vespa/storage/config/distributorconfiguration.h @@ -235,6 +235,13 @@ public: void setSequenceMutatingOperations(bool sequenceMutations) noexcept { _sequenceMutatingOperations = sequenceMutations; } + + bool allowStaleReadsDuringClusterStateTransitions() const noexcept { + return _allowStaleReadsDuringClusterStateTransitions; + } + void setAllowStaleReadsDuringClusterStateTransitions(bool allow) noexcept { + _allowStaleReadsDuringClusterStateTransitions = allow; + } private: DistributorConfiguration(const DistributorConfiguration& other); @@ -274,6 +281,7 @@ private: bool _enableHostInfoReporting; bool _disableBucketActivation; bool _sequenceMutatingOperations; + bool _allowStaleReadsDuringClusterStateTransitions; DistrConfig::MinimumReplicaCountingMode _minimumReplicaCountingMode; diff --git a/storage/src/vespa/storage/config/stor-distributormanager.def b/storage/src/vespa/storage/config/stor-distributormanager.def index 89aad427ca9..d4f69073cc6 100644 --- a/storage/src/vespa/storage/config/stor-distributormanager.def +++ b/storage/src/vespa/storage/config/stor-distributormanager.def @@ -184,3 +184,10 @@ sequence_mutating_operations bool default=true ## towards a node if it has indicated that its merge queues are full or it is ## suffering from resource exhaustion. inhibit_merge_sending_on_busy_node_duration_sec int default=10 + +## If set, enables potentially stale reads during cluster state transitions where +## buckets change ownership. This also implicitly enables support for two-phase +## cluster state transitions on the distributor. +## For this option to take effect, the cluster controller must also have two-phase +## states enabled. +allow_stale_reads_during_cluster_state_transitions bool default=false diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp index a223001af79..e9595b4a960 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp @@ -20,11 +20,12 @@ using document::BucketSpace; namespace storage::distributor { BucketDBUpdater::BucketDBUpdater(Distributor& owner, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, DistributorMessageSender& sender, DistributorComponentRegister& compReg) : framework::StatusReporter("bucketdb", "Bucket DB Updater"), - _distributorComponent(owner, bucketSpaceRepo, compReg, "Bucket DB Updater"), + _distributorComponent(owner, bucketSpaceRepo, readOnlyBucketSpaceRepo, compReg, "Bucket DB Updater"), _sender(sender), _transitionTimer(_distributorComponent.getClock()) { @@ -53,6 +54,13 @@ BucketDBUpdater::print(std::ostream& out, bool verbose, const std::string& inden } bool +BucketDBUpdater::shouldDeferStateEnabling() const noexcept +{ + return _distributorComponent.getDistributor().getConfig() + .allowStaleReadsDuringClusterStateTransitions(); +} + +bool BucketDBUpdater::hasPendingClusterState() const { return static_cast<bool>(_pendingClusterState); @@ -113,25 +121,35 @@ void BucketDBUpdater::removeSuperfluousBuckets( const lib::ClusterStateBundle& newState) { + const bool move_to_read_only_db = shouldDeferStateEnabling(); for (auto &elem : _distributorComponent.getBucketSpaceRepo()) { const auto &newDistribution(elem.second->getDistribution()); const auto &oldClusterState(elem.second->getClusterState()); auto &bucketDb(elem.second->getBucketDatabase()); + auto& readOnlyDb(_distributorComponent.getReadOnlyBucketSpaceRepo().get(elem.first).getBucketDatabase()); // Remove all buckets not belonging to this distributor, or // being on storage nodes that are no longer up. NodeRemover proc( oldClusterState, *newState.getDerivedClusterState(elem.first), - _distributorComponent.getBucketIdFactory(), _distributorComponent.getIndex(), newDistribution, _distributorComponent.getDistributor().getStorageNodeUpStates()); bucketDb.forEach(proc); - for (const auto & entry :proc.getBucketsToRemove()) { - bucketDb.remove(entry); + for (const auto & bucket : proc.getBucketsToRemove()) { + bucketDb.remove(bucket); + } + // TODO vec of Entry instead to avoid lookup and remove? Uses more transient memory... + for (const auto& bucket : proc.getNonOwnedBuckets()) { + if (move_to_read_only_db) { + auto db_entry = bucketDb.get(bucket); + readOnlyDb.update(db_entry); // TODO Entry move support + } + bucketDb.remove(bucket); } + } } @@ -154,6 +172,14 @@ BucketDBUpdater::completeTransitionTimer() } void +BucketDBUpdater::clearReadOnlyBucketRepoDatabases() +{ + for (auto& space : _distributorComponent.getReadOnlyBucketSpaceRepo()) { + space.second->getBucketDatabase().clear(); + } +} + +void BucketDBUpdater::storageDistributionChanged() { ensureTransitionTimerStarted(); @@ -169,6 +195,7 @@ BucketDBUpdater::storageDistributionChanged() std::move(clusterInfo), _sender, _distributorComponent.getBucketSpaceRepo(), + _distributorComponent.getReadOnlyBucketSpaceRepo(), _distributorComponent.getUniqueTimestamp()); _outdatedNodesMap = _pendingClusterState->getOutdatedNodesMap(); } @@ -176,14 +203,22 @@ BucketDBUpdater::storageDistributionChanged() void BucketDBUpdater::replyToPreviousPendingClusterStateIfAny() { - if (_pendingClusterState.get() && - _pendingClusterState->getCommand().get()) - { + if (_pendingClusterState.get() && _pendingClusterState->hasCommand()) { _distributorComponent.sendUp( std::make_shared<api::SetSystemStateReply>(*_pendingClusterState->getCommand())); } } +void +BucketDBUpdater::replyToActivationWithActualVersion( + const api::ActivateClusterStateVersionCommand& cmd, + uint32_t actualVersion) +{ + auto reply = std::make_shared<api::ActivateClusterStateVersionReply>(cmd); + reply->setActualVersion(actualVersion); + _distributorComponent.sendUp(reply); // TODO let API accept rvalues +} + bool BucketDBUpdater::onSetSystemState( const std::shared_ptr<api::SetSystemStateCommand>& cmd) @@ -214,6 +249,7 @@ BucketDBUpdater::onSetSystemState( std::move(clusterInfo), _sender, _distributorComponent.getBucketSpaceRepo(), + _distributorComponent.getReadOnlyBucketSpaceRepo(), cmd, _outdatedNodesMap, _distributorComponent.getUniqueTimestamp()); @@ -225,6 +261,39 @@ BucketDBUpdater::onSetSystemState( return true; } +bool +BucketDBUpdater::onActivateClusterStateVersion(const std::shared_ptr<api::ActivateClusterStateVersionCommand>& cmd) +{ + if (hasPendingClusterState() && _pendingClusterState->isVersionedTransition()) { + const auto pending_version = _pendingClusterState->clusterStateVersion(); + if (pending_version == cmd->version()) { + if (isPendingClusterStateCompleted()) { + assert(_pendingClusterState->isDeferred()); + activatePendingClusterState(); + } else { + LOG(error, "Received cluster state activation for pending version %u " + "without pending state being complete yet. This is not expected, " + "as no activation should be sent before all distributors have " + "reported that state processing is complete.", pending_version); + replyToActivationWithActualVersion(*cmd, 0); // Invalid version, will cause re-send (hopefully when completed). + return true; + } + } else { + replyToActivationWithActualVersion(*cmd, pending_version); + return true; + } + } else if (shouldDeferStateEnabling()) { + // Likely just a resend, but log warn for now to get a feel of how common it is. + LOG(warning, "Received cluster state activation command for version %u, which " + "has no corresponding pending state. Likely resent operation.", cmd->version()); + } else { + LOG(debug, "Received cluster state activation command for version %u, but distributor " + "config does not have deferred activation enabled. Treating as no-op.", cmd->version()); + } + // Fall through to next link in call chain that cares about this message. + return false; +} + BucketDBUpdater::MergeReplyGuard::~MergeReplyGuard() { if (_reply) { @@ -485,14 +554,45 @@ BucketDBUpdater::isPendingClusterStateCompleted() const void BucketDBUpdater::processCompletedPendingClusterState() { + if (_pendingClusterState->isDeferred()) { + LOG(debug, "Deferring completion of pending cluster state version %u until explicitly activated", + _pendingClusterState->clusterStateVersion()); + assert(_pendingClusterState->hasCommand()); // Deferred transitions should only ever be created by state commands. + // Sending down SetSystemState command will reach the state manager and a reply + // will be auto-sent back to the cluster controller in charge. Once this happens, + // it will send an explicit activation command once all distributors have reported + // that their pending cluster states have completed. + // A booting distributor will treat itself as "system Up" before the state has actually + // taken effect via activation. External operation handler will keep operations from + // actually being scheduled until state has been activated. The external operation handler + // needs to be explicitly aware of the case where no state has yet to be activated. + _distributorComponent.getDistributor().getMessageSender().sendDown( + _pendingClusterState->getCommand()); + _pendingClusterState->clearCommand(); + return; + } + // Distribution config change or non-deferred cluster state. Immediately activate + // the pending state without being told to do so explicitly. + activatePendingClusterState(); +} + +void +BucketDBUpdater::activatePendingClusterState() +{ _pendingClusterState->mergeIntoBucketDatabases(); - if (_pendingClusterState->getCommand().get()) { + if (_pendingClusterState->isVersionedTransition()) { + LOG(debug, "Activating pending cluster state version %u", _pendingClusterState->clusterStateVersion()); enableCurrentClusterStateBundleInDistributor(); - _distributorComponent.getDistributor().getMessageSender().sendDown( - _pendingClusterState->getCommand()); + if (_pendingClusterState->hasCommand()) { + _distributorComponent.getDistributor().getMessageSender().sendDown( + _pendingClusterState->getCommand()); + } addCurrentStateToClusterStateHistory(); } else { + LOG(debug, "Activating pending distribution config"); + // TODO distribution changes cannot currently be deferred as they are not + // initiated by the cluster controller! _distributorComponent.getDistributor().notifyDistributionChangeEnabled(); } @@ -500,13 +600,14 @@ BucketDBUpdater::processCompletedPendingClusterState() _outdatedNodesMap.clear(); sendAllQueuedBucketRechecks(); completeTransitionTimer(); + clearReadOnlyBucketRepoDatabases(); } void BucketDBUpdater::enableCurrentClusterStateBundleInDistributor() { const lib::ClusterStateBundle& state( - _pendingClusterState->getCommand()->getClusterStateBundle()); + _pendingClusterState->getNewClusterStateBundle()); LOG(debug, "BucketDBUpdater finished processing state %s", @@ -688,7 +789,7 @@ BucketDBUpdater::NodeRemover::process(BucketDatabase::Entry& e) return true; } if (!distributorOwnsBucket(bucketId)) { - _removedBuckets.push_back(bucketId); + _nonOwnedBuckets.push_back(bucketId); return true; } diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.h b/storage/src/vespa/storage/distributor/bucketdbupdater.h index ea67e7ea72a..393e1e2524e 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.h +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.h @@ -33,7 +33,8 @@ public: using OutdatedNodes = dbtransition::OutdatedNodes; using OutdatedNodesMap = dbtransition::OutdatedNodesMap; BucketDBUpdater(Distributor& owner, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, DistributorMessageSender& sender, DistributorComponentRegister& compReg); ~BucketDBUpdater(); @@ -43,6 +44,7 @@ public: void recheckBucketInfo(uint32_t nodeIdx, const document::Bucket& bucket); bool onSetSystemState(const std::shared_ptr<api::SetSystemStateCommand>& cmd) override; + bool onActivateClusterStateVersion(const std::shared_ptr<api::ActivateClusterStateVersionCommand>& cmd) override; bool onRequestBucketInfoReply(const std::shared_ptr<api::RequestBucketInfoReply> & repl) override; bool onMergeBucketReply(const std::shared_ptr<api::MergeBucketReply>& reply) override; bool onNotifyBucketChange(const std::shared_ptr<api::NotifyBucketChangeCommand>&) override; @@ -124,6 +126,7 @@ private: } }; + bool shouldDeferStateEnabling() const noexcept; bool hasPendingClusterState() const; bool pendingClusterStateAccepted(const std::shared_ptr<api::RequestBucketInfoReply>& repl); bool processSingleBucketInfoReply(const std::shared_ptr<api::RequestBucketInfoReply>& repl); @@ -131,6 +134,7 @@ private: const BucketRequest& req); bool isPendingClusterStateCompleted() const; void processCompletedPendingClusterState(); + void activatePendingClusterState(); void mergeBucketInfoWithDatabase(const std::shared_ptr<api::RequestBucketInfoReply>& repl, const BucketRequest& req); void convertBucketInfoToBucketList(const std::shared_ptr<api::RequestBucketInfoReply>& repl, @@ -141,6 +145,7 @@ private: BucketListMerger::BucketList& existing) const; void ensureTransitionTimerStarted(); void completeTransitionTimer(); + void clearReadOnlyBucketRepoDatabases(); /** * Adds all buckets contained in the bucket database * that are either contained @@ -161,6 +166,9 @@ private: void removeSuperfluousBuckets(const lib::ClusterStateBundle& newState); void replyToPreviousPendingClusterStateIfAny(); + void replyToActivationWithActualVersion( + const api::ActivateClusterStateVersionCommand& cmd, + uint32_t actualVersion); void enableCurrentClusterStateBundleInDistributor(); void addCurrentStateToClusterStateHistory(); @@ -191,30 +199,35 @@ private: public: NodeRemover(const lib::ClusterState& oldState, const lib::ClusterState& s, - [[maybe_unused]] const document::BucketIdFactory& factory, uint16_t localIndex, const lib::Distribution& distribution, const char* upStates) : _oldState(oldState), _state(s), + _nonOwnedBuckets(), + _removedBuckets(), _localIndex(localIndex), _distribution(distribution), _upStates(upStates) {} - ~NodeRemover(); + ~NodeRemover() override; bool process(BucketDatabase::Entry& e) override; void logRemove(const document::BucketId& bucketId, const char* msg) const; bool distributorOwnsBucket(const document::BucketId&) const; - const std::vector<document::BucketId>& getBucketsToRemove() const { + const std::vector<document::BucketId>& getBucketsToRemove() const noexcept { return _removedBuckets; } + const std::vector<document::BucketId>& getNonOwnedBuckets() const noexcept { + return _nonOwnedBuckets; + } private: void setCopiesInEntry(BucketDatabase::Entry& e, const std::vector<BucketCopy>& copies) const; void removeEmptyBucket(const document::BucketId& bucketId); const lib::ClusterState _oldState; const lib::ClusterState _state; + std::vector<document::BucketId> _nonOwnedBuckets; std::vector<document::BucketId> _removedBuckets; uint16_t _localIndex; diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index 1664dd0d9a1..c92dfbdc14e 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -67,15 +67,16 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _compReg(compReg), _component(compReg, "distributor"), _bucketSpaceRepo(std::make_unique<DistributorBucketSpaceRepo>()), + _readOnlyBucketSpaceRepo(std::make_unique<DistributorBucketSpaceRepo>()), _metrics(new DistributorMetricSet(_component.getLoadTypes()->getMetricLoadTypes())), _operationOwner(*this, _component.getClock()), _maintenanceOperationOwner(*this, _component.getClock()), _pendingMessageTracker(compReg), - _bucketDBUpdater(*this, *_bucketSpaceRepo, *this, compReg), + _bucketDBUpdater(*this, *_bucketSpaceRepo, *_readOnlyBucketSpaceRepo, *this, compReg), _distributorStatusDelegate(compReg, *this, *this), _bucketDBStatusDelegate(compReg, *this, _bucketDBUpdater), - _idealStateManager(*this, *_bucketSpaceRepo, compReg, manageActiveBucketCopies), - _externalOperationHandler(*this, *_bucketSpaceRepo, _idealStateManager, compReg), + _idealStateManager(*this, *_bucketSpaceRepo, *_readOnlyBucketSpaceRepo, compReg, manageActiveBucketCopies), + _externalOperationHandler(*this, *_bucketSpaceRepo, *_readOnlyBucketSpaceRepo, _idealStateManager, compReg), _threadPool(threadPool), _initializingIsUp(true), _doneInitializeHandler(doneInitHandler), @@ -575,16 +576,20 @@ void Distributor::propagateDefaultDistribution( std::shared_ptr<const lib::Distribution> distribution) { - _bucketSpaceRepo->get(document::FixedBucketSpaces::default_space()).setDistribution(distribution); auto global_distr = GlobalBucketSpaceDistributionConverter::convert_to_global(*distribution); - _bucketSpaceRepo->get(document::FixedBucketSpaces::global_space()).setDistribution(std::move(global_distr)); + for (auto* repo : {_bucketSpaceRepo.get(), _readOnlyBucketSpaceRepo.get()}) { + repo->get(document::FixedBucketSpaces::default_space()).setDistribution(distribution); + repo->get(document::FixedBucketSpaces::global_space()).setDistribution(global_distr); + } } void Distributor::propagateClusterStates() { - for (auto &iter : *_bucketSpaceRepo) { - iter.second->setClusterState(_clusterStateBundle.getDerivedClusterState(iter.first)); + for (auto* repo : {_bucketSpaceRepo.get(), _readOnlyBucketSpaceRepo.get()}) { + for (auto& iter : *repo) { + iter.second->setClusterState(_clusterStateBundle.getDerivedClusterState(iter.first)); + } } } diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h index fb8a9fb4299..cd24b91eba2 100644 --- a/storage/src/vespa/storage/distributor/distributor.h +++ b/storage/src/vespa/storage/distributor/distributor.h @@ -158,6 +158,13 @@ public: DistributorBucketSpaceRepo &getBucketSpaceRepo() noexcept { return *_bucketSpaceRepo; } const DistributorBucketSpaceRepo &getBucketSpaceRepo() const noexcept { return *_bucketSpaceRepo; } + DistributorBucketSpaceRepo& getReadOnlyBucketSpaceRepo() noexcept { + return *_readOnlyBucketSpaceRepo; + } + const DistributorBucketSpaceRepo& getReadyOnlyBucketSpaceRepo() const noexcept { + return *_readOnlyBucketSpaceRepo; + } + private: friend class Distributor_Test; friend class BucketDBUpdaterTest; @@ -244,6 +251,10 @@ private: DistributorComponentRegister& _compReg; storage::DistributorComponent _component; std::unique_ptr<DistributorBucketSpaceRepo> _bucketSpaceRepo; + // Read-only bucket space repo with DBs that only contain buckets transiently + // during cluster state transitions. Bucket set does not overlap that of _bucketSpaceRepo + // and the DBs are empty during non-transition phases. + std::unique_ptr<DistributorBucketSpaceRepo> _readOnlyBucketSpaceRepo; std::shared_ptr<DistributorMetricSet> _metrics; OperationOwner _operationOwner; diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.cpp b/storage/src/vespa/storage/distributor/distributorcomponent.cpp index d3d07350d35..9bd215b9644 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.cpp +++ b/storage/src/vespa/storage/distributor/distributorcomponent.cpp @@ -15,16 +15,18 @@ namespace storage::distributor { DistributorComponent::DistributorComponent( DistributorInterface& distributor, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, DistributorComponentRegister& compReg, const std::string& name) : storage::DistributorComponent(compReg, name), _distributor(distributor), - _bucketSpaceRepo(bucketSpaceRepo) + _bucketSpaceRepo(bucketSpaceRepo), + _readOnlyBucketSpaceRepo(readOnlyBucketSpaceRepo) { } -DistributorComponent::~DistributorComponent() {} +DistributorComponent::~DistributorComponent() = default; void DistributorComponent::sendDown(const api::StorageMessage::SP& msg) diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.h b/storage/src/vespa/storage/distributor/distributorcomponent.h index 561904cee8d..f2aea89d47c 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.h +++ b/storage/src/vespa/storage/distributor/distributorcomponent.h @@ -29,11 +29,12 @@ class DistributorComponent : public storage::DistributorComponent { public: DistributorComponent(DistributorInterface& distributor, - DistributorBucketSpaceRepo &bucketSpaceRepo, - DistributorComponentRegister& compReg, - const std::string& name); + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, + DistributorComponentRegister& compReg, + const std::string& name); - ~DistributorComponent(); + ~DistributorComponent() override; /** * Returns the ownership status of a bucket as decided with the given @@ -153,6 +154,9 @@ public: DistributorBucketSpaceRepo &getBucketSpaceRepo() { return _bucketSpaceRepo; } const DistributorBucketSpaceRepo &getBucketSpaceRepo() const { return _bucketSpaceRepo; } + DistributorBucketSpaceRepo& getReadOnlyBucketSpaceRepo() { return _readOnlyBucketSpaceRepo; } + const DistributorBucketSpaceRepo& getReadOnlyBucketSpaceRepo() const { return _readOnlyBucketSpaceRepo; } + /** * Finds a bucket that has the same direct parent as the given bucket * (i.e. split one bit less), but different bit in the most used bit. @@ -179,7 +183,8 @@ private: protected: - DistributorBucketSpaceRepo &_bucketSpaceRepo; + DistributorBucketSpaceRepo& _bucketSpaceRepo; + DistributorBucketSpaceRepo& _readOnlyBucketSpaceRepo; vespalib::Lock _sync; }; diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.cpp b/storage/src/vespa/storage/distributor/distributormetricsset.cpp index 927dc06182d..83923a1f00e 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.cpp +++ b/storage/src/vespa/storage/distributor/distributormetricsset.cpp @@ -17,7 +17,7 @@ DistributorMetricSet::DistributorMetricSet(const metrics::LoadTypeSet& lt) removelocations(lt, PersistenceOperationMetricSet("removelocations"), this), gets(lt, PersistenceOperationMetricSet("gets"), this), stats(lt, PersistenceOperationMetricSet("stats"), this), - multioperations(lt, PersistenceOperationMetricSet("multioperations"), this), + getbucketlists(lt, PersistenceOperationMetricSet("getbucketlists"), this), visits(lt, VisitorMetricSet(), this), stateTransitionTime("state_transition_time", {}, "Time it takes to complete a cluster state transition. If a " diff --git a/storage/src/vespa/storage/distributor/distributormetricsset.h b/storage/src/vespa/storage/distributor/distributormetricsset.h index 5a64027f500..dfe976a89ab 100644 --- a/storage/src/vespa/storage/distributor/distributormetricsset.h +++ b/storage/src/vespa/storage/distributor/distributormetricsset.h @@ -20,7 +20,7 @@ public: metrics::LoadMetric<PersistenceOperationMetricSet> removelocations; metrics::LoadMetric<PersistenceOperationMetricSet> gets; metrics::LoadMetric<PersistenceOperationMetricSet> stats; - metrics::LoadMetric<PersistenceOperationMetricSet> multioperations; + metrics::LoadMetric<PersistenceOperationMetricSet> getbucketlists; metrics::LoadMetric<VisitorMetricSet> visits; metrics::DoubleAverageMetric stateTransitionTime; metrics::DoubleAverageMetric recoveryModeTime; diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp index b22592af327..1b88f02cac6 100644 --- a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp +++ b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp @@ -20,14 +20,18 @@ #include "distributor_bucket_space.h" #include <vespa/log/log.h> +#include <vespa/document/bucket/fixed_bucket_spaces.h> + LOG_SETUP(".distributor.manager"); namespace storage::distributor { -ExternalOperationHandler::ExternalOperationHandler(Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, +ExternalOperationHandler::ExternalOperationHandler(Distributor& owner, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, const MaintenanceOperationGenerator& gen, DistributorComponentRegister& compReg) - : DistributorComponent(owner, bucketSpaceRepo, compReg, "External operation handler"), + : DistributorComponent(owner, bucketSpaceRepo, readOnlyBucketSpaceRepo, compReg, "External operation handler"), _operationGenerator(gen), _rejectFeedBeforeTimeReached() // At epoch { } @@ -68,19 +72,69 @@ ExternalOperationHandler::checkSafeTimeReached(api::StorageCommand& cmd) return true; } +void ExternalOperationHandler::bounce_with_result(api::StorageCommand& cmd, const api::ReturnCode& result) { + api::StorageReply::UP reply(cmd.makeReply()); + reply->setResult(result); + sendUp(std::shared_ptr<api::StorageMessage>(reply.release())); +} + +void ExternalOperationHandler::bounce_with_wrong_distribution(api::StorageCommand& cmd) { + // Distributor ownership is equal across bucket spaces, so always send back default space state. + // This also helps client avoid getting confused by possibly observing different actual + // (derived) state strings for global/non-global document types for the same state version. + // Similarly, if we've yet to activate any version at all we send back BUSY instead + // of a suspiciously empty WrongDistributionReply. + // TOOD consider NOT_READY instead of BUSY once we're sure this won't cause any other issues. + const auto& cluster_state = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space()).getClusterState(); + if (cluster_state.getVersion() != 0) { + auto cluster_state_str = cluster_state.toString(); + LOG(debug, "Got message with wrong distribution, sending back state '%s'", cluster_state_str.c_str()); + bounce_with_result(cmd, api::ReturnCode(api::ReturnCode::WRONG_DISTRIBUTION, cluster_state_str)); + } else { // Only valid for empty startup state + LOG(debug, "Got message with wrong distribution, but no cluster state activated yet. Sending back BUSY"); + bounce_with_result(cmd, api::ReturnCode(api::ReturnCode::BUSY, "No cluster state activated yet")); + } +} + +void ExternalOperationHandler::bounce_with_busy_during_state_transition( + api::StorageCommand& cmd, + const lib::ClusterState& current_state, + const lib::ClusterState& pending_state) +{ + auto status_str = vespalib::make_string("Currently pending cluster state transition" + " from version %u to %u", + current_state.getVersion(), pending_state.getVersion()); + + api::StorageReply::UP reply(cmd.makeReply()); + api::ReturnCode ret(api::ReturnCode::BUSY, status_str); + reply->setResult(ret); + sendUp(std::shared_ptr<api::StorageMessage>(reply.release())); +} + bool ExternalOperationHandler::checkTimestampMutationPreconditions(api::StorageCommand& cmd, const document::BucketId &bucketId, PersistenceOperationMetricSet& persistenceMetrics) { document::Bucket bucket(cmd.getBucket().getBucketSpace(), bucketId); - if (!checkDistribution(cmd, bucket)) { + if (!ownsBucketInCurrentState(bucket)) { LOG(debug, "Distributor manager received %s, bucket %s with wrong distribution", cmd.toString().c_str(), bucket.toString().c_str()); - + bounce_with_wrong_distribution(cmd); persistenceMetrics.failures.wrongdistributor.inc(); return false; } + + auto pending = getDistributor().checkOwnershipInPendingState(bucket); + if (!pending.isOwned()) { + // We return BUSY here instead of WrongDistributionReply to avoid clients potentially + // ping-ponging between cluster state versions during a state transition. + auto& current_state = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space()).getClusterState(); + auto& pending_state = pending.getNonOwnedState(); + bounce_with_busy_during_state_transition(cmd, current_state, pending_state); + return false; + } + if (!checkSafeTimeReached(cmd)) { persistenceMetrics.failures.safe_time_not_reached.inc(); return false; @@ -111,6 +165,35 @@ bool ExternalOperationHandler::allowMutation(const SequencingHandle& handle) con return handle.valid(); } +template <typename Func> +void ExternalOperationHandler::bounce_or_invoke_read_only_op( + api::StorageCommand& cmd, + const document::Bucket& bucket, + PersistenceOperationMetricSet& metrics, + Func func) +{ + if (!ownsBucketInCurrentState(bucket)) { + LOG(debug, "Distributor manager received %s, bucket %s with wrong distribution", + cmd.toString().c_str(), bucket.toString().c_str()); + bounce_with_wrong_distribution(cmd); + metrics.failures.wrongdistributor.inc(); + return; + } + + auto pending = getDistributor().checkOwnershipInPendingState(bucket); + if (pending.isOwned()) { + func(_bucketSpaceRepo); + } else { + if (getDistributor().getConfig().allowStaleReadsDuringClusterStateTransitions()) { + func(_readOnlyBucketSpaceRepo); + } else { + auto& current_state = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space()).getClusterState(); + auto& pending_state = pending.getNonOwnedState(); + bounce_with_busy_during_state_transition(cmd, current_state, pending_state); + } + } +} + IMPL_MSG_COMMAND_H(ExternalOperationHandler, Put) { auto& metrics = getMetrics().puts[cmd->getLoadType()]; @@ -186,10 +269,8 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, RemoveLocation) RemoveLocationOperation::getBucketId(*this, *cmd, bid); document::Bucket bucket(cmd->getBucket().getBucketSpace(), bid); - if (!checkDistribution(*cmd, bucket)) { - LOG(debug, "Distributor manager received %s with wrong distribution", cmd->toString().c_str()); - - getMetrics().removelocations[cmd->getLoadType()].failures.wrongdistributor.inc(); + auto& metrics = getMetrics().removelocations[cmd->getLoadType()]; + if (!checkTimestampMutationPreconditions(*cmd, bucket.getBucketId(), metrics)) { return true; } @@ -201,43 +282,38 @@ IMPL_MSG_COMMAND_H(ExternalOperationHandler, RemoveLocation) IMPL_MSG_COMMAND_H(ExternalOperationHandler, Get) { document::Bucket bucket(cmd->getBucket().getBucketSpace(), getBucketId(cmd->getDocumentId())); - if (!checkDistribution(*cmd, bucket)) { - LOG(debug, "Distributor manager received get for %s, bucket %s with wrong distribution", - cmd->getDocumentId().toString().c_str(), bucket.toString().c_str()); - - getMetrics().gets[cmd->getLoadType()].failures.wrongdistributor.inc(); - return true; - } - - _op = std::make_shared<GetOperation>(*this, _bucketSpaceRepo.get(cmd->getBucket().getBucketSpace()), - cmd, getMetrics().gets[cmd->getLoadType()]); + auto& metrics = getMetrics().gets[cmd->getLoadType()]; + bounce_or_invoke_read_only_op(*cmd, bucket, metrics, [&](auto& bucket_space_repo) { + _op = std::make_shared<GetOperation>(*this, bucket_space_repo.get(cmd->getBucket().getBucketSpace()), + cmd, metrics); + }); return true; } IMPL_MSG_COMMAND_H(ExternalOperationHandler, StatBucket) { - if (!checkDistribution(*cmd, cmd->getBucket())) { - return true; - } - auto &distributorBucketSpace(_bucketSpaceRepo.get(cmd->getBucket().getBucketSpace())); - _op = std::make_shared<StatBucketOperation>(*this, distributorBucketSpace, cmd); + auto& metrics = getMetrics().stats[cmd->getLoadType()]; + bounce_or_invoke_read_only_op(*cmd, cmd->getBucket(), metrics, [&](auto& bucket_space_repo) { + auto& bucket_space = bucket_space_repo.get(cmd->getBucket().getBucketSpace()); + _op = std::make_shared<StatBucketOperation>(*this, bucket_space, cmd); + }); return true; } IMPL_MSG_COMMAND_H(ExternalOperationHandler, GetBucketList) { - if (!checkDistribution(*cmd, cmd->getBucket())) { - return true; - } - auto bucketSpace(cmd->getBucket().getBucketSpace()); - auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpace)); - auto &bucketDatabase(distributorBucketSpace.getBucketDatabase()); - _op = std::make_shared<StatBucketListOperation>(bucketDatabase, _operationGenerator, getIndex(), cmd); + auto& metrics = getMetrics().getbucketlists[cmd->getLoadType()]; + bounce_or_invoke_read_only_op(*cmd, cmd->getBucket(), metrics, [&](auto& bucket_space_repo) { + auto& bucket_space = bucket_space_repo.get(cmd->getBucket().getBucketSpace()); + auto& bucket_database = bucket_space.getBucketDatabase(); + _op = std::make_shared<StatBucketListOperation>(bucket_database, _operationGenerator, getIndex(), cmd); + }); return true; } IMPL_MSG_COMMAND_H(ExternalOperationHandler, CreateVisitor) { + // TODO same handling as Gets (VisitorOperation needs to change) const DistributorConfiguration& config(getDistributor().getConfig()); VisitorOperation::Config visitorConfig(config.getMinBucketsPerVisitor(), config.getMaxVisitorsPerNodePerClientVisitor()); auto &distributorBucketSpace(_bucketSpaceRepo.get(cmd->getBucket().getBucketSpace())); diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.h b/storage/src/vespa/storage/distributor/externaloperationhandler.h index c198fe30159..655feb5d00c 100644 --- a/storage/src/vespa/storage/distributor/externaloperationhandler.h +++ b/storage/src/vespa/storage/distributor/externaloperationhandler.h @@ -37,10 +37,11 @@ public: ExternalOperationHandler(Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, const MaintenanceOperationGenerator&, DistributorComponentRegister& compReg); - ~ExternalOperationHandler(); + ~ExternalOperationHandler() override; bool handleMessage(const std::shared_ptr<api::StorageMessage>& msg, Operation::SP& operation); @@ -55,6 +56,18 @@ private: Operation::SP _op; TimePoint _rejectFeedBeforeTimeReached; + template <typename Func> + void bounce_or_invoke_read_only_op(api::StorageCommand& cmd, + const document::Bucket& bucket, + PersistenceOperationMetricSet& metrics, + Func f); + + void bounce_with_wrong_distribution(api::StorageCommand& cmd); + void bounce_with_busy_during_state_transition(api::StorageCommand& cmd, + const lib::ClusterState& current_state, + const lib::ClusterState& pending_state); + void bounce_with_result(api::StorageCommand& cmd, const api::ReturnCode& result); + bool checkSafeTimeReached(api::StorageCommand& cmd); api::ReturnCode makeSafeTimeRejectionResult(TimePoint unsafeTime); bool checkTimestampMutationPreconditions( diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.cpp b/storage/src/vespa/storage/distributor/idealstatemanager.cpp index 77b924ad351..5a1ff31e2e7 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemanager.cpp @@ -26,11 +26,12 @@ namespace distributor { IdealStateManager::IdealStateManager( Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, DistributorComponentRegister& compReg, bool manageActiveBucketCopies) : HtmlStatusReporter("idealstateman", "Ideal state manager"), _metrics(new IdealStateMetricSet), - _distributorComponent(owner, bucketSpaceRepo, compReg, "Ideal state manager"), + _distributorComponent(owner, bucketSpaceRepo, readOnlyBucketSpaceRepo, compReg, "Ideal state manager"), _bucketSpaceRepo(bucketSpaceRepo) { _distributorComponent.registerStatusPage(*this); diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.h b/storage/src/vespa/storage/distributor/idealstatemanager.h index c8be2a40ad7..3bb6d0dd757 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.h +++ b/storage/src/vespa/storage/distributor/idealstatemanager.h @@ -37,6 +37,7 @@ public: IdealStateManager(Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, DistributorComponentRegister& compReg, bool manageActiveBucketCopies); diff --git a/storage/src/vespa/storage/distributor/operations/external/getoperation.h b/storage/src/vespa/storage/distributor/operations/external/getoperation.h index 198c588dfd1..3936f13077e 100644 --- a/storage/src/vespa/storage/distributor/operations/external/getoperation.h +++ b/storage/src/vespa/storage/distributor/operations/external/getoperation.h @@ -34,6 +34,9 @@ public: bool hasConsistentCopies() const; + // Exposed for unit testing. TODO feels a bit dirty :I + const DistributorBucketSpace& bucketSpace() const noexcept { return _bucketSpace; } + private: class GroupId { public: diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 5f74a82c28a..6cba7084037 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -27,7 +27,8 @@ PendingClusterState::PendingClusterState( const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp) @@ -40,6 +41,9 @@ PendingClusterState::PendingClusterState( _creationTimestamp(creationTimestamp), _sender(sender), _bucketSpaceRepo(bucketSpaceRepo), + _readOnlyBucketSpaceRepo(readOnlyBucketSpaceRepo), + _clusterStateVersion(_cmd->getClusterStateBundle().getVersion()), + _isVersionedTransition(true), _bucketOwnershipTransfer(false), _pendingTransitions() { @@ -51,7 +55,8 @@ PendingClusterState::PendingClusterState( const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, api::Timestamp creationTimestamp) : _requestedNodes(clusterInfo->getStorageNodeCount()), _prevClusterStateBundle(clusterInfo->getClusterStateBundle()), @@ -61,6 +66,9 @@ PendingClusterState::PendingClusterState( _creationTimestamp(creationTimestamp), _sender(sender), _bucketSpaceRepo(bucketSpaceRepo), + _readOnlyBucketSpaceRepo(readOnlyBucketSpaceRepo), + _clusterStateVersion(0), + _isVersionedTransition(false), _bucketOwnershipTransfer(true), _pendingTransitions() { diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h index b96ba8cbbd7..cedc0573381 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.h +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h @@ -45,15 +45,16 @@ public: const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp) { - return std::unique_ptr<PendingClusterState>( - new PendingClusterState(clock, clusterInfo, sender, bucketSpaceRepo, newStateCmd, - outdatedNodesMap, - creationTimestamp)); + // Naked new due to private constructor + return std::unique_ptr<PendingClusterState>(new PendingClusterState( + clock, clusterInfo, sender, bucketSpaceRepo, readOnlyBucketSpaceRepo, + newStateCmd, outdatedNodesMap, creationTimestamp)); } /** @@ -64,16 +65,19 @@ public: const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, api::Timestamp creationTimestamp) { - return std::unique_ptr<PendingClusterState>( - new PendingClusterState(clock, clusterInfo, sender, bucketSpaceRepo, creationTimestamp)); + // Naked new due to private constructor + return std::unique_ptr<PendingClusterState>(new PendingClusterState( + clock, clusterInfo, sender, bucketSpaceRepo, + readOnlyBucketSpaceRepo, creationTimestamp)); } PendingClusterState(const PendingClusterState &) = delete; PendingClusterState & operator = (const PendingClusterState &) = delete; - ~PendingClusterState(); + ~PendingClusterState() override; /** * Adds the info from the reply to our list of information. @@ -104,10 +108,31 @@ public: return _bucketOwnershipTransfer; } + bool hasCommand() const noexcept { + return (_cmd.get() != nullptr); + } + std::shared_ptr<api::SetSystemStateCommand> getCommand() { return _cmd; } + bool isVersionedTransition() const noexcept { + return _isVersionedTransition; + } + + uint32_t clusterStateVersion() const noexcept { + return _clusterStateVersion; + } + + bool isDeferred() const noexcept { + return (isVersionedTransition() + && _newClusterStateBundle.deferredActivation()); + } + + void clearCommand() { + _cmd.reset(); + } + const lib::ClusterStateBundle& getNewClusterStateBundle() const { return _newClusterStateBundle; } @@ -141,7 +166,8 @@ private: const framework::Clock&, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp); @@ -154,7 +180,8 @@ private: const framework::Clock&, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, - DistributorBucketSpaceRepo &bucketSpaceRepo, + DistributorBucketSpaceRepo& bucketSpaceRepo, + DistributorBucketSpaceRepo& readOnlyBucketSpaceRepo, api::Timestamp creationTimestamp); struct BucketSpaceAndNode { @@ -204,8 +231,10 @@ private: api::Timestamp _creationTimestamp; DistributorMessageSender& _sender; - DistributorBucketSpaceRepo &_bucketSpaceRepo; - + DistributorBucketSpaceRepo& _bucketSpaceRepo; + DistributorBucketSpaceRepo& _readOnlyBucketSpaceRepo; + uint32_t _clusterStateVersion; + bool _isVersionedTransition; bool _bucketOwnershipTransfer; std::unordered_map<document::BucketSpace, std::unique_ptr<PendingBucketSpaceDbTransition>, document::BucketSpace::hash> _pendingTransitions; }; diff --git a/storage/src/vespa/storage/storageserver/bouncer.cpp b/storage/src/vespa/storage/storageserver/bouncer.cpp index 0541c7322f1..fdbfd553315 100644 --- a/storage/src/vespa/storage/storageserver/bouncer.cpp +++ b/storage/src/vespa/storage/storageserver/bouncer.cpp @@ -235,6 +235,7 @@ Bouncer::onDown(const std::shared_ptr<api::StorageMessage>& msg) case api::MessageType::SETNODESTATE_ID: case api::MessageType::GETNODESTATE_ID: case api::MessageType::SETSYSTEMSTATE_ID: + case api::MessageType::ACTIVATE_CLUSTER_STATE_VERSION_ID: case api::MessageType::NOTIFYBUCKETCHANGE_ID: // state commands are always ok return false; diff --git a/storage/src/vespa/storage/storageserver/communicationmanager.cpp b/storage/src/vespa/storage/storageserver/communicationmanager.cpp index 7fb85ef0ecc..978d434847e 100644 --- a/storage/src/vespa/storage/storageserver/communicationmanager.cpp +++ b/storage/src/vespa/storage/storageserver/communicationmanager.cpp @@ -622,20 +622,25 @@ CommunicationManager::sendDirectRPCReply( { std::string requestName(request.getMethodName()); if (requestName == "getnodestate3") { - api::GetNodeStateReply& gns(static_cast<api::GetNodeStateReply&>(*reply)); + auto& gns(dynamic_cast<api::GetNodeStateReply&>(*reply)); std::ostringstream ns; serializeNodeState(gns, ns, true, true, false); request.addReturnString(ns.str().c_str()); request.addReturnString(gns.getNodeInfo().c_str()); LOGBP(debug, "Sending getnodestate3 reply with host info '%s'.", gns.getNodeInfo().c_str()); } else if (requestName == "getnodestate2") { - api::GetNodeStateReply& gns(static_cast<api::GetNodeStateReply&>(*reply)); + auto& gns(dynamic_cast<api::GetNodeStateReply&>(*reply)); std::ostringstream ns; serializeNodeState(gns, ns, true, true, false); request.addReturnString(ns.str().c_str()); LOGBP(debug, "Sending getnodestate2 reply with no host info."); } else if (requestName == "setsystemstate2" || requestName == "setdistributionstates") { // No data to return + } else if (requestName == "activate_cluster_state_version") { + auto& activate_reply(dynamic_cast<api::ActivateClusterStateVersionReply&>(*reply)); + request.addReturnInt(activate_reply.actualVersion()); + LOGBP(debug, "sending activate_cluster_state_version reply for version %u with actual version %u ", + activate_reply.activateVersion(), activate_reply.actualVersion()); } else { request.addReturnInt(reply->getResult().getResult()); request.addReturnString(reply->getResult().getMessage().c_str()); diff --git a/storage/src/vespa/storage/storageserver/fnetlistener.cpp b/storage/src/vespa/storage/storageserver/fnetlistener.cpp index e31bded772c..ec488b25714 100644 --- a/storage/src/vespa/storage/storageserver/fnetlistener.cpp +++ b/storage/src/vespa/storage/storageserver/fnetlistener.cpp @@ -92,6 +92,11 @@ FNetListener::initRPC() rb.ParamDesc("uncompressedSize", "Uncompressed size for payload"); rb.ParamDesc("payload", "Binary Slime format payload"); //------------------------------------------------------------------------- + rb.DefineMethod("activate_cluster_state_version", "i", "i", FRT_METHOD(FNetListener::RPC_activateClusterStateVersion), this); + rb.MethodDesc("Explicitly activates an already prepared cluster state version"); + rb.ParamDesc("activate_version", "Expected cluster state version to activate"); + rb.ReturnDesc("actual_version", "Cluster state version that was prepared on the node prior to receiving RPC"); + //------------------------------------------------------------------------- rb.DefineMethod("getcurrenttime", "", "lis", FRT_METHOD(FNetListener::RPC_getCurrentTime), this); rb.MethodDesc("Get current time on this node"); rb.ReturnDesc("seconds", "Current time in seconds since epoch"); @@ -203,6 +208,7 @@ void FNetListener::RPC_setDistributionStates(FRT_RPCRequest* req) { req->SetError(RPCRequestWrapper::ERR_BAD_REQUEST, e.what()); return; } + LOG(debug, "Got state bundle %s", state_bundle->toString().c_str()); // TODO add constructor taking in shared_ptr directly instead? auto cmd = std::make_shared<api::SetSystemStateCommand>(*state_bundle); @@ -211,4 +217,20 @@ void FNetListener::RPC_setDistributionStates(FRT_RPCRequest* req) { detach_and_forward_to_enqueuer(std::move(cmd), req); } +void FNetListener::RPC_activateClusterStateVersion(FRT_RPCRequest* req) { + if (_closed) { + LOG(debug, "Not handling RPC call activate_cluster_state_version() as we have closed"); + req->SetError(RPCRequestWrapper::ERR_NODE_SHUTTING_DOWN, "Node shutting down"); + return; + } + + const uint32_t activate_version = req->GetParams()->GetValue(0)._intval32; + auto cmd = std::make_shared<api::ActivateClusterStateVersionCommand>(activate_version); + cmd->setPriority(api::StorageMessage::VERYHIGH); + + LOG(debug, "Got state activation request for version %u", activate_version); + + detach_and_forward_to_enqueuer(std::move(cmd), req); +} + } diff --git a/storage/src/vespa/storage/storageserver/fnetlistener.h b/storage/src/vespa/storage/storageserver/fnetlistener.h index abcba18e0be..2097be15491 100644 --- a/storage/src/vespa/storage/storageserver/fnetlistener.h +++ b/storage/src/vespa/storage/storageserver/fnetlistener.h @@ -26,6 +26,7 @@ public: void RPC_setSystemState2(FRT_RPCRequest *req); void RPC_getCurrentTime(FRT_RPCRequest *req); void RPC_setDistributionStates(FRT_RPCRequest* req); + void RPC_activateClusterStateVersion(FRT_RPCRequest* req); void registerHandle(vespalib::stringref handle); void close(); diff --git a/storage/src/vespa/storage/storageserver/slime_cluster_state_bundle_codec.cpp b/storage/src/vespa/storage/storageserver/slime_cluster_state_bundle_codec.cpp index 5b7e0ab4621..1f854bc724e 100644 --- a/storage/src/vespa/storage/storageserver/slime_cluster_state_bundle_codec.cpp +++ b/storage/src/vespa/storage/storageserver/slime_cluster_state_bundle_codec.cpp @@ -53,6 +53,9 @@ EncodedClusterStateBundle SlimeClusterStateBundleCodec::encode( { vespalib::Slime slime; Cursor& root = slime.setObject(); + if (bundle.deferredActivation()) { + root.setBool("deferred-activation", bundle.deferredActivation()); + } Cursor& states = root.setObject("states"); states.setString("baseline", serialize_state(*bundle.getBaselineClusterState())); Cursor& spaces = states.setObject("spaces"); @@ -79,6 +82,7 @@ namespace { static const Memory StatesField("states"); static const Memory BaselineField("baseline"); static const Memory SpacesField("spaces"); +static const Memory DeferredActivationField("deferred-activation"); struct StateInserter : vespalib::slime::ObjectTraverser { lib::ClusterStateBundle::BucketSpaceStateMapping& _space_states; @@ -118,8 +122,11 @@ std::shared_ptr<const lib::ClusterStateBundle> SlimeClusterStateBundleCodec::dec lib::ClusterStateBundle::BucketSpaceStateMapping space_states; StateInserter inserter(space_states); spaces.traverse(inserter); + + const bool deferred_activation = root[DeferredActivationField].asBool(); // Defaults to false if not set. + // TODO add shared_ptr constructor for baseline? - return std::make_shared<lib::ClusterStateBundle>(baseline, std::move(space_states)); + return std::make_shared<lib::ClusterStateBundle>(baseline, std::move(space_states), deferred_activation); } } diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 95cb5dec696..af01a880fea 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -514,6 +514,19 @@ StateManager::onSetSystemState( return true; } +bool +StateManager::onActivateClusterStateVersion( + const std::shared_ptr<api::ActivateClusterStateVersionCommand>& cmd) +{ + auto reply = std::make_shared<api::ActivateClusterStateVersionReply>(*cmd); + { + vespalib::LockGuard lock(_stateLock); + reply->setActualVersion(_systemState ? _systemState->getVersion() : 0); + } + sendUp(reply); + return true; +} + void StateManager::run(framework::ThreadHandle& thread) { diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h index 0bacd41f6d9..57f0e02a136 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.h +++ b/storage/src/vespa/storage/storageserver/statemanager.h @@ -137,6 +137,7 @@ private: bool onGetNodeState(const std::shared_ptr<api::GetNodeStateCommand>&) override; bool onSetSystemState(const std::shared_ptr<api::SetSystemStateCommand>&) override; + bool onActivateClusterStateVersion(const std::shared_ptr<api::ActivateClusterStateVersionCommand>&) override; /** * _stateLock MUST NOT be held while calling. diff --git a/storageapi/src/vespa/storageapi/message/state.cpp b/storageapi/src/vespa/storageapi/message/state.cpp index efa9a45764f..071dba16b91 100644 --- a/storageapi/src/vespa/storageapi/message/state.cpp +++ b/storageapi/src/vespa/storageapi/message/state.cpp @@ -12,6 +12,8 @@ IMPLEMENT_COMMAND(GetNodeStateCommand, GetNodeStateReply) IMPLEMENT_REPLY(GetNodeStateReply) IMPLEMENT_COMMAND(SetSystemStateCommand, SetSystemStateReply) IMPLEMENT_REPLY(SetSystemStateReply) +IMPLEMENT_COMMAND(ActivateClusterStateVersionCommand, ActivateClusterStateVersionReply) +IMPLEMENT_REPLY(ActivateClusterStateVersionReply) GetNodeStateCommand::GetNodeStateCommand(lib::NodeState::UP expectedState) : StorageCommand(MessageType::GETNODESTATE), @@ -102,5 +104,39 @@ SetSystemStateReply::print(std::ostream& out, bool verbose, } } +ActivateClusterStateVersionCommand::ActivateClusterStateVersionCommand(uint32_t version) + : StorageCommand(MessageType::ACTIVATE_CLUSTER_STATE_VERSION), + _version(version) +{ +} + +void ActivateClusterStateVersionCommand::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + out << "ActivateClusterStateVersionCommand(" << _version << ")"; + if (verbose) { + out << " : "; + StorageCommand::print(out, verbose, indent); + } +} + +ActivateClusterStateVersionReply::ActivateClusterStateVersionReply(const ActivateClusterStateVersionCommand& cmd) + : StorageReply(cmd), + _activateVersion(cmd.version()), + _actualVersion(0) // Must be set explicitly +{ +} + +void ActivateClusterStateVersionReply::print(std::ostream& out, bool verbose, + const std::string& indent) const +{ + out << "ActivateClusterStateVersionReply(activate " << _activateVersion + << ", actual " << _actualVersion << ")"; + if (verbose) { + out << " : "; + StorageReply::print(out, verbose, indent); + } +} + } // api } // storage diff --git a/storageapi/src/vespa/storageapi/message/state.h b/storageapi/src/vespa/storageapi/message/state.h index 4e5ad92b259..91c4707e7c1 100644 --- a/storageapi/src/vespa/storageapi/message/state.h +++ b/storageapi/src/vespa/storageapi/message/state.h @@ -93,4 +93,27 @@ public: DECLARE_STORAGEREPLY(SetSystemStateReply, onSetSystemStateReply) }; +class ActivateClusterStateVersionCommand : public StorageCommand { + uint32_t _version; +public: + explicit ActivateClusterStateVersionCommand(uint32_t version); + uint32_t version() const noexcept { return _version; } + void print(std::ostream& out, bool verbose, const std::string& indent) const override; + + DECLARE_STORAGECOMMAND(ActivateClusterStateVersionCommand, onActivateClusterStateVersion); +}; + +class ActivateClusterStateVersionReply : public StorageReply { + uint32_t _activateVersion; + uint32_t _actualVersion; +public: + explicit ActivateClusterStateVersionReply(const ActivateClusterStateVersionCommand&); + uint32_t activateVersion() const noexcept { return _activateVersion; } + void setActualVersion(uint32_t version) noexcept { _actualVersion = version; } + uint32_t actualVersion() const noexcept { return _actualVersion; } + void print(std::ostream& out, bool verbose, const std::string& indent) const override; + + DECLARE_STORAGEREPLY(ActivateClusterStateVersionReply, onActivateClusterStateVersionReply); +}; + } diff --git a/storageapi/src/vespa/storageapi/messageapi/messagehandler.h b/storageapi/src/vespa/storageapi/messageapi/messagehandler.h index a9c1dfb8f26..27ee509e859 100644 --- a/storageapi/src/vespa/storageapi/messageapi/messagehandler.h +++ b/storageapi/src/vespa/storageapi/messageapi/messagehandler.h @@ -50,6 +50,8 @@ class NotifyBucketChangeCommand; class SetNodeStateCommand; class GetNodeStateCommand; class SetSystemStateCommand; +class ActivateClusterStateVersionCommand; +class ActivateClusterStateVersionReply; class GetSystemStateCommand; class GetBucketNodesCommand; class BucketsAddedCommand; @@ -276,6 +278,12 @@ public: virtual bool onSetSystemStateReply( const std::shared_ptr<api::SetSystemStateReply>&) { return false; } + virtual bool onActivateClusterStateVersion( + const std::shared_ptr<api::ActivateClusterStateVersionCommand>&) + { return false; } + virtual bool onActivateClusterStateVersionReply( + const std::shared_ptr<api::ActivateClusterStateVersionReply>&) + { return false; } virtual bool onGetSystemState( const std::shared_ptr<api::GetSystemStateCommand>&) { return false; } diff --git a/storageapi/src/vespa/storageapi/messageapi/storagemessage.cpp b/storageapi/src/vespa/storageapi/messageapi/storagemessage.cpp index bab475eea32..40422ce06c4 100644 --- a/storageapi/src/vespa/storageapi/messageapi/storagemessage.cpp +++ b/storageapi/src/vespa/storageapi/messageapi/storagemessage.cpp @@ -77,6 +77,8 @@ const MessageType MessageType::SETSYSTEMSTATE("Set system state", SETSYSTEMSTATE const MessageType MessageType::SETSYSTEMSTATE_REPLY("Set system state reply", SETSYSTEMSTATE_REPLY_ID, &MessageType::SETSYSTEMSTATE); const MessageType MessageType::GETSYSTEMSTATE("Get system state", GETSYSTEMSTATE_ID); const MessageType MessageType::GETSYSTEMSTATE_REPLY("get system state reply", GETSYSTEMSTATE_REPLY_ID, &MessageType::GETSYSTEMSTATE); +const MessageType MessageType::ACTIVATE_CLUSTER_STATE_VERSION("Activate cluster state version", ACTIVATE_CLUSTER_STATE_VERSION_ID); +const MessageType MessageType::ACTIVATE_CLUSTER_STATE_VERSION_REPLY("Activate cluster state version reply", ACTIVATE_CLUSTER_STATE_VERSION_REPLY_ID, &MessageType::ACTIVATE_CLUSTER_STATE_VERSION); const MessageType MessageType::GETBUCKETDIFF("GetBucketDiff", GETBUCKETDIFF_ID); const MessageType MessageType::GETBUCKETDIFF_REPLY("GetBucketDiff reply", GETBUCKETDIFF_REPLY_ID, &MessageType::GETBUCKETDIFF); const MessageType MessageType::APPLYBUCKETDIFF("ApplyBucketDiff", APPLYBUCKETDIFF_ID); diff --git a/storageapi/src/vespa/storageapi/messageapi/storagemessage.h b/storageapi/src/vespa/storageapi/messageapi/storagemessage.h index c9f6e737a47..8c2338a020c 100644 --- a/storageapi/src/vespa/storageapi/messageapi/storagemessage.h +++ b/storageapi/src/vespa/storageapi/messageapi/storagemessage.h @@ -149,6 +149,8 @@ public: QUERYRESULT_REPLY_ID = 89, SETBUCKETSTATE_ID = 94, SETBUCKETSTATE_REPLY_ID = 95, + ACTIVATE_CLUSTER_STATE_VERSION_ID = 96, + ACTIVATE_CLUSTER_STATE_VERSION_REPLY_ID = 97, MESSAGETYPE_MAX_ID }; @@ -195,6 +197,8 @@ public: static const MessageType SETSYSTEMSTATE_REPLY; static const MessageType GETSYSTEMSTATE; static const MessageType GETSYSTEMSTATE_REPLY; + static const MessageType ACTIVATE_CLUSTER_STATE_VERSION; + static const MessageType ACTIVATE_CLUSTER_STATE_VERSION_REPLY; static const MessageType BUCKETSADDED; static const MessageType BUCKETSADDED_REPLY; static const MessageType BUCKETSREMOVED; diff --git a/vdslib/src/vespa/vdslib/state/cluster_state_bundle.cpp b/vdslib/src/vespa/vdslib/state/cluster_state_bundle.cpp index ed561d67f6d..ff633c02fad 100644 --- a/vdslib/src/vespa/vdslib/state/cluster_state_bundle.cpp +++ b/vdslib/src/vespa/vdslib/state/cluster_state_bundle.cpp @@ -4,18 +4,30 @@ #include "cluster_state_bundle.h" #include "clusterstate.h" #include <iostream> +#include <sstream> namespace storage::lib { ClusterStateBundle::ClusterStateBundle(const ClusterState &baselineClusterState) - : _baselineClusterState(std::make_shared<const ClusterState>(baselineClusterState)) + : _baselineClusterState(std::make_shared<const ClusterState>(baselineClusterState)), + _deferredActivation(false) { } ClusterStateBundle::ClusterStateBundle(const ClusterState& baselineClusterState, BucketSpaceStateMapping derivedBucketSpaceStates) : _baselineClusterState(std::make_shared<const ClusterState>(baselineClusterState)), - _derivedBucketSpaceStates(std::move(derivedBucketSpaceStates)) + _derivedBucketSpaceStates(std::move(derivedBucketSpaceStates)), + _deferredActivation(false) +{ +} + +ClusterStateBundle::ClusterStateBundle(const ClusterState& baselineClusterState, + BucketSpaceStateMapping derivedBucketSpaceStates, + bool deferredActivation) + : _baselineClusterState(std::make_shared<const ClusterState>(baselineClusterState)), + _derivedBucketSpaceStates(std::move(derivedBucketSpaceStates)), + _deferredActivation(deferredActivation) { } @@ -52,6 +64,9 @@ ClusterStateBundle::operator==(const ClusterStateBundle &rhs) const if (_derivedBucketSpaceStates.size() != rhs._derivedBucketSpaceStates.size()) { return false; } + if (_deferredActivation != rhs._deferredActivation) { + return false; + } // Can't do a regular operator== comparison since we must check equality // of cluster state _values_, not their _pointers_. for (auto& lhs_ds : _derivedBucketSpaceStates) { @@ -64,6 +79,14 @@ ClusterStateBundle::operator==(const ClusterStateBundle &rhs) const return true; } +std::string +ClusterStateBundle::toString() const +{ + std::ostringstream os; + os << *this; + return os.str(); +} + std::ostream& operator<<(std::ostream& os, const ClusterStateBundle& bundle) { os << "ClusterStateBundle('" << *bundle.getBaselineClusterState(); if (!bundle.getDerivedClusterStates().empty()) { @@ -74,7 +97,11 @@ std::ostream& operator<<(std::ostream& os, const ClusterStateBundle& bundle) { os << " '" << *ds.second; } } - os << "')"; + os << '\''; + if (bundle.deferredActivation()) { + os << " (deferred activation)"; + } + os << ")"; return os; } diff --git a/vdslib/src/vespa/vdslib/state/cluster_state_bundle.h b/vdslib/src/vespa/vdslib/state/cluster_state_bundle.h index a64416762b8..d0b052766ff 100644 --- a/vdslib/src/vespa/vdslib/state/cluster_state_bundle.h +++ b/vdslib/src/vespa/vdslib/state/cluster_state_bundle.h @@ -5,6 +5,7 @@ #include <vespa/document/bucket/bucketspace.h> #include <unordered_map> #include <iosfwd> +#include <string> namespace storage::lib { @@ -24,10 +25,14 @@ public: >; std::shared_ptr<const ClusterState> _baselineClusterState; BucketSpaceStateMapping _derivedBucketSpaceStates; + bool _deferredActivation; public: explicit ClusterStateBundle(const ClusterState &baselineClusterState); ClusterStateBundle(const ClusterState& baselineClusterState, BucketSpaceStateMapping derivedBucketSpaceStates); + ClusterStateBundle(const ClusterState& baselineClusterState, + BucketSpaceStateMapping derivedBucketSpaceStates, + bool deferredActivation); ~ClusterStateBundle(); const std::shared_ptr<const ClusterState> &getBaselineClusterState() const; const std::shared_ptr<const ClusterState> &getDerivedClusterState(document::BucketSpace bucketSpace) const; @@ -35,6 +40,8 @@ public: return _derivedBucketSpaceStates; } uint32_t getVersion() const; + bool deferredActivation() const noexcept { return _deferredActivation; } + std::string toString() const; bool operator==(const ClusterStateBundle &rhs) const; bool operator!=(const ClusterStateBundle &rhs) const { return !operator==(rhs); } }; diff --git a/vespa-testrunner-components/CMakeLists.txt b/vespa-testrunner-components/CMakeLists.txt new file mode 100644 index 00000000000..fe2cb84b7bb --- /dev/null +++ b/vespa-testrunner-components/CMakeLists.txt @@ -0,0 +1,3 @@ +install_java_artifact(vespa-testrunner-components) +install_fat_java_artifact(vespa-testrunner-components) +install_config_definition(src/main/resources/configdefinitions/test-runner.def test-runner.def) diff --git a/vespa-testrunner-components/OWNERS b/vespa-testrunner-components/OWNERS new file mode 100644 index 00000000000..134acfc20f3 --- /dev/null +++ b/vespa-testrunner-components/OWNERS @@ -0,0 +1 @@ +jvenstad diff --git a/vespa-testrunner-components/README.md b/vespa-testrunner-components/README.md new file mode 100644 index 00000000000..034ad95ac25 --- /dev/null +++ b/vespa-testrunner-components/README.md @@ -0,0 +1,4 @@ +# Vespa-testrunner-components + +Defines handler and component used by the vespa application that is deployed by the controller to +run system/staging/production tests. diff --git a/vespa-testrunner-components/pom.xml b/vespa-testrunner-components/pom.xml new file mode 100644 index 00000000000..66bcd92df50 --- /dev/null +++ b/vespa-testrunner-components/pom.xml @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" + xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <modelVersion>4.0.0</modelVersion> + <groupId>com.yahoo.vespa.hosted</groupId> + <artifactId>vespa-testrunner-components</artifactId> + <packaging>container-plugin</packaging> + + <parent> + <groupId>com.yahoo.vespa</groupId> + <artifactId>parent</artifactId> + <version>7-SNAPSHOT</version> + <relativePath>../parent/pom.xml</relativePath> + </parent> + + <dependencies> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.fusesource.jansi</groupId> + <artifactId>jansi</artifactId> + <version>1.11</version> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <executions> + <execution> + <id>attach-artifacts</id> + <phase>package</phase> + <goals> + <goal>attach-artifact</goal> + </goals> + <configuration> + <artifacts> + <artifact> + <file>target/${project.artifactId}-jar-with-dependencies.jar</file> + <type>jar</type> + <classifier>deploy</classifier> + </artifact> + </artifacts> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <environmentVariables> + <VESPA_HOME>/opt/vespa</VESPA_HOME> + </environmentVariables> + </configuration> + </plugin> + <plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>bundle-plugin</artifactId> + <version>${project.version}</version> + <extensions>true</extensions> + <configuration> + <useCommonAssemblyIds>true</useCommonAssemblyIds> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + </plugin> + </plugins> + </build> +</project> diff --git a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/PomXmlGenerator.java b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/PomXmlGenerator.java new file mode 100644 index 00000000000..7a85eabe289 --- /dev/null +++ b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/PomXmlGenerator.java @@ -0,0 +1,110 @@ +package com.yahoo.vespa.hosted.testrunner; + +import com.yahoo.vespa.defaults.Defaults; + +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Generates a pom.xml file that sets up build profile to test against the provided + * jar artifacts. + * + * @author valerijf + */ +public class PomXmlGenerator { + private static final String PROPERTY_TEMPLATE = + " <%ARTIFACT_ID%.path>%JAR_PATH%</%ARTIFACT_ID%.path>\n"; + private static final String TEST_ARTIFACT_GROUP_ID = "com.yahoo.vespa.testrunner.test"; + private static final String DEPENDENCY_TEMPLATE = + " <dependency>\n" + + " <groupId>" + TEST_ARTIFACT_GROUP_ID + "</groupId>\n" + + " <artifactId>%ARTIFACT_ID%</artifactId>\n" + + " <scope>system</scope>\n" + + " <type>test-jar</type>\n" + + " <version>test</version>\n" + + " <systemPath>${%ARTIFACT_ID%.path}</systemPath>\n" + + " </dependency>\n"; + private static final String DEPENDENCY_TO_SCAN_TEMPLATE = + " <dependency>" + TEST_ARTIFACT_GROUP_ID + ":%ARTIFACT_ID%</dependency>\n"; + private static final String POM_XML_TEMPLATE = + "<?xml version=\"1.0\"?>\n" + + "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n" + + " <modelVersion>4.0.0</modelVersion>\n" + + " <groupId>com.yahoo.vespa</groupId>\n" + + " <artifactId>tester-application</artifactId>\n" + + " <version>1.0.0</version>\n" + + "\n" + + " <properties>\n" + + " <maven_version>4.12</maven_version>\n" + + " <surefire_version>2.22.0</surefire_version>\n" + + "%PROPERTIES%" + + " </properties>\n" + + "\n" + + " <dependencies>\n" + + " <dependency>\n" + + " <groupId>junit</groupId>\n" + + " <artifactId>junit</artifactId>\n" + + " <version>${maven_version}</version>\n" + + " <scope>test</scope>\n" + + " </dependency>\n" + + "%DEPENDENCIES%" + + " </dependencies>\n" + + "\n" + + " <build>\n" + + " <plugins>\n" + + " <plugin>\n" + + " <groupId>org.apache.maven.plugins</groupId>\n" + + " <artifactId>maven-surefire-plugin</artifactId>\n" + + " <version>${surefire_version}</version>\n" + + " <configuration>\n" + + " <dependenciesToScan>\n" + + "%DEPENDENCIES_TO_SCAN%" + + " </dependenciesToScan>\n" + + " <groups>%GROUPS%</groups>\n" + + " <excludedGroups>com.yahoo.vespa.tenant.systemtest.base.impl.EmptyExcludeGroup.class</excludedGroups>\n" + + " <excludes>\n" + + " <exclude>%GROUPS%</exclude>\n" + + " </excludes>\n" + + " <reportsDirectory>${env.TEST_DIR}</reportsDirectory>\n" + + " <redirectTestOutputToFile>false</redirectTestOutputToFile>\n" + + " <environmentVariables>\n" + + " <LD_LIBRARY_PATH>" + Defaults.getDefaults().underVespaHome("lib64") + "</LD_LIBRARY_PATH>\n" + + " </environmentVariables>\n" + + " </configuration>\n" + + " </plugin>\n" + + " <plugin>\n" + + " <groupId>org.apache.maven.plugins</groupId>\n" + + " <artifactId>maven-surefire-report-plugin</artifactId>\n" + + " <version>${surefire_version}</version>\n" + + " <configuration>\n" + + " <reportsDirectory>${env.TEST_DIR}</reportsDirectory>\n" + + " </configuration>\n" + + " </plugin>\n" + + " </plugins>\n" + + " </build>\n" + + "</project>\n"; + + static String generatePomXml(TestProfile testProfile, List<Path> artifacts, Path testArtifact) { + String properties = artifacts.stream() + .map(path -> PROPERTY_TEMPLATE + .replace("%ARTIFACT_ID%", path.getFileName().toString()) + .replace("%JAR_PATH%", path.toString())) + .collect(Collectors.joining()); + String dependencies = artifacts.stream() + .map(path -> DEPENDENCY_TEMPLATE + .replace("%ARTIFACT_ID%", path.getFileName().toString())) + .collect(Collectors.joining()); + String dependenciesToScan = + DEPENDENCY_TO_SCAN_TEMPLATE + .replace("%ARTIFACT_ID%", testArtifact.getFileName().toString()); + + return POM_XML_TEMPLATE + .replace("%PROPERTIES%", properties) + .replace("%DEPENDENCIES_TO_SCAN%", dependenciesToScan) + .replace("%DEPENDENCIES%", dependencies) + .replace("%GROUPS%", testProfile.group()); + } + + private PomXmlGenerator() {} +} diff --git a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java new file mode 100644 index 00000000000..b7d3a06f30d --- /dev/null +++ b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestProfile.java @@ -0,0 +1,29 @@ +package com.yahoo.vespa.hosted.testrunner; + +/** + * @author valerijf + * @author jvenstad + */ +enum TestProfile { + + SYSTEM_TEST("com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest", true), + STAGING_TEST("com.yahoo.vespa.tenant.cd.StagingTest, com.yahoo.vespa.tenant.systemtest.base.StagingTest", true), + PRODUCTION_TEST("com.yahoo.vespa.tenant.cd.ProductionTest, com.yahoo.vespa.tenant.systemtest.base.ProductionTest", false); + + private final String group; + private final boolean failIfNoTests; + + TestProfile(String group, boolean failIfNoTests) { + this.group = group; + this.failIfNoTests = failIfNoTests; + } + + String group() { + return group; + } + + boolean failIfNoTests() { + return failIfNoTests; + } + +} diff --git a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunner.java b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunner.java new file mode 100644 index 00000000000..fb5dccc551d --- /dev/null +++ b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunner.java @@ -0,0 +1,195 @@ +package com.yahoo.vespa.hosted.testrunner; + +import com.google.inject.Inject; +import com.yahoo.vespa.defaults.Defaults; +import org.fusesource.jansi.AnsiOutputStream; +import org.fusesource.jansi.HtmlAnsiOutputStream; + +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.List; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.function.Function; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.yahoo.log.LogLevel.ERROR; + +/** + * @author valerijf + * @author jvenstad + */ +public class TestRunner { + + private static final Logger logger = Logger.getLogger(TestRunner.class.getName()); + private static final Level HTML = new Level("html", 1) { }; + private static final Path vespaHome = Paths.get(Defaults.getDefaults().vespaHome()); + private static final String settingsXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + + "<settings xmlns=\"http://maven.apache.org/SETTINGS/1.0.0\"\n" + + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + + " xsi:schemaLocation=\"http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd\">\n" + + " <mirrors>\n" + + " <mirror>\n" + + " <id>maven central</id>\n" + + " <mirrorOf>*</mirrorOf>\n" + // Use this for everything! + " <url>https://repo.maven.apache.org/maven2/</url>\n" + + " </mirror>\n" + + " </mirrors>\n" + + "</settings>"; + + private final Path artifactsPath; + private final Path testPath; + private final Path logFile; + private final Path configFile; + private final Path settingsFile; + private final Function<TestProfile, ProcessBuilder> testBuilder; + private final SortedMap<Long, LogRecord> log = new ConcurrentSkipListMap<>(); + + private volatile Status status = Status.NOT_STARTED; + + @Inject + public TestRunner(TestRunnerConfig config) { + this(config.artifactsPath(), + vespaHome.resolve("tmp/test"), + vespaHome.resolve("logs/vespa/maven.log"), + vespaHome.resolve("tmp/config.json"), + vespaHome.resolve("tmp/settings.xml"), + profile -> { // Anything to make this testable! >_< + String[] command = new String[]{ + "mvn", + "test", + + "--batch-mode", // Run in non-interactive (batch) mode (disables output color) + "--show-version", // Display version information WITHOUT stopping build + "--settings", // Need to override repository settings in ymaven config >_< + vespaHome.resolve("tmp/settings.xml").toString(), + + // Disable maven download progress indication + "-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn", + "-Dstyle.color=always", // Enable ANSI color codes again + "-DfailIfNoTests=" + profile.failIfNoTests(), + "-Dvespa.test.config=" + vespaHome.resolve("tmp/config.json"), + "-Dvespa.test.credentials.root=" + Defaults.getDefaults().vespaHome() + "/var/vespa/sia", + String.format("-DargLine=-Xms%1$dm -Xmx%1$dm", config.surefireMemoryMb()) + }; + ProcessBuilder builder = new ProcessBuilder(command); + builder.environment().merge("MAVEN_OPTS", " -Djansi.force=true", String::concat); + builder.directory(vespaHome.resolve("tmp/test").toFile()); + builder.redirectErrorStream(true); + return builder; + }); + } + + TestRunner(Path artifactsPath, Path testPath, Path logFile, Path configFile, Path settingsFile, Function<TestProfile, ProcessBuilder> testBuilder) { + this.artifactsPath = artifactsPath; + this.testPath = testPath; + this.logFile = logFile; + this.configFile = configFile; + this.settingsFile = settingsFile; + this.testBuilder = testBuilder; + } + + public synchronized void test(TestProfile testProfile, byte[] testConfig) { + if (status == Status.RUNNING) + throw new IllegalArgumentException("Tests are already running; should not receive this request now."); + + log.clear(); + status = Status.RUNNING; + + new Thread(() -> runTests(testProfile, testConfig)).start(); + } + + public Collection<LogRecord> getLog(long after) { + return log.tailMap(after + 1).values(); + } + + public synchronized Status getStatus() { + return status; + } + + private void runTests(TestProfile testProfile, byte[] testConfig) { + ProcessBuilder builder = testBuilder.apply(testProfile); + { + LogRecord record = new LogRecord(Level.INFO, + String.format("Starting %s. Artifacts directory: %s Config file: %s\nCommand to run: %s", + testProfile.name(), artifactsPath, configFile, String.join(" ", builder.command()))); + log.put(record.getSequenceNumber(), record); + logger.log(record); + } + + boolean success; + // The AnsiOutputStream filters out ANSI characters, leaving the file contents pure. + try (PrintStream fileStream = new PrintStream(new AnsiOutputStream(new BufferedOutputStream(new FileOutputStream(logFile.toFile())))); + ByteArrayOutputStream logBuffer = new ByteArrayOutputStream(); + PrintStream logFormatter = new PrintStream(new HtmlAnsiOutputStream(logBuffer))){ + writeTestApplicationPom(testProfile); + Files.write(configFile, testConfig); + Files.write(settingsFile, settingsXml.getBytes()); + + Process mavenProcess = builder.start(); + BufferedReader in = new BufferedReader(new InputStreamReader(mavenProcess.getInputStream())); + in.lines().forEach(line -> { + fileStream.println(line); + logFormatter.print(line); + LogRecord record = new LogRecord(HTML, logBuffer.toString()); + log.put(record.getSequenceNumber(), record); + logBuffer.reset(); + }); + success = mavenProcess.waitFor() == 0; + } + catch (Exception exception) { + LogRecord record = new LogRecord(ERROR, "Failed to execute maven command: " + String.join(" ", builder.command())); + record.setThrown(exception); + logger.log(record); + log.put(record.getSequenceNumber(), record); + try (PrintStream file = new PrintStream(new FileOutputStream(logFile.toFile(), true))) { + file.println(record.getMessage()); + exception.printStackTrace(file); + } + catch (IOException ignored) { } + status = Status.ERROR; + return; + } + status = success ? Status.SUCCESS : Status.FAILURE; + } + + private void writeTestApplicationPom(TestProfile testProfile) throws IOException { + List<Path> files = listFiles(artifactsPath); + Path testJar = files.stream().filter(file -> file.toString().endsWith("tests.jar")).findFirst() + .orElseThrow(() -> new IllegalStateException("No file ending with 'tests.jar' found under '" + artifactsPath + "'!")); + String pomXml = PomXmlGenerator.generatePomXml(testProfile, files, testJar); + testPath.toFile().mkdirs(); + Files.write(testPath.resolve("pom.xml"), pomXml.getBytes()); + } + + private static List<Path> listFiles(Path directory) { + try (Stream<Path> element = Files.walk(directory)) { + return element + .filter(Files::isRegularFile) + .filter(path -> path.toString().endsWith(".jar")) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new UncheckedIOException("Failed to list files under " + directory, e); + } + } + + + public enum Status { + NOT_STARTED, RUNNING, FAILURE, ERROR, SUCCESS + } + +} diff --git a/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandler.java b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandler.java new file mode 100644 index 00000000000..d3393ce8dbe --- /dev/null +++ b/vespa-testrunner-components/src/main/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandler.java @@ -0,0 +1,166 @@ +package com.yahoo.vespa.hosted.testrunner; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.inject.Inject; +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.container.jdisc.HttpResponse; +import com.yahoo.container.jdisc.LoggingRequestHandler; +import com.yahoo.container.logging.AccessLog; +import com.yahoo.io.IOUtils; +import com.yahoo.log.LogLevel; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.JsonFormat; +import com.yahoo.slime.Slime; +import com.yahoo.yolean.Exceptions; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Collection; +import java.util.concurrent.Executor; +import java.util.logging.Level; +import java.util.logging.LogRecord; + +import static com.yahoo.jdisc.Response.Status; + +/** + * @author valerijf + * @author jvenstad + */ +public class TestRunnerHandler extends LoggingRequestHandler { + + private static final String CONTENT_TYPE_APPLICATION_JSON = "application/json"; + + private final TestRunner testRunner; + + @Inject + public TestRunnerHandler(Executor executor, AccessLog accessLog, TestRunner testRunner) { + super(executor, accessLog); + this.testRunner = testRunner; + } + + @Override + public HttpResponse handle(HttpRequest request) { + try { + switch (request.getMethod()) { + case GET: return handleGET(request); + case POST: return handlePOST(request); + + default: return new Response(Status.METHOD_NOT_ALLOWED, "Method '" + request.getMethod() + "' is not supported"); + } + } catch (IllegalArgumentException e) { + return new Response(Status.BAD_REQUEST, Exceptions.toMessageString(e)); + } catch (Exception e) { + log.log(Level.WARNING, "Unexpected error handling '" + request.getUri() + "'", e); + return new Response(Status.INTERNAL_SERVER_ERROR, Exceptions.toMessageString(e)); + } + } + + private HttpResponse handleGET(HttpRequest request) { + String path = request.getUri().getPath(); + if (path.equals("/tester/v1/log")) { + return new SlimeJsonResponse(toSlime(testRunner.getLog(request.hasProperty("after") + ? Long.parseLong(request.getProperty("after")) + : -1))); + } else if (path.equals("/tester/v1/status")) { + log.info("Responding with status " + testRunner.getStatus()); + return new Response(testRunner.getStatus().name()); + } + return new Response(Status.NOT_FOUND, "Not found: " + request.getUri().getPath()); + } + + private HttpResponse handlePOST(HttpRequest request) throws IOException, InterruptedException { + final String path = request.getUri().getPath(); + if (path.startsWith("/tester/v1/run/")) { + String type = lastElement(path); + TestProfile testProfile = TestProfile.valueOf(type.toUpperCase() + "_TEST"); + byte[] config = IOUtils.readBytes(request.getData(), 1 << 16); + testRunner.test(testProfile, config); + log.info("Started tests of type " + type + " and status is " + testRunner.getStatus()); + return new Response("Successfully started " + type + " tests"); + } + return new Response(Status.NOT_FOUND, "Not found: " + request.getUri().getPath()); + } + + private static String lastElement(String path) { + if (path.endsWith("/")) + path = path.substring(0, path.length()-1); + int lastSlash = path.lastIndexOf("/"); + if (lastSlash < 0) return path; + return path.substring(lastSlash + 1, path.length()); + } + + static Slime toSlime(Collection<LogRecord> log) { + Slime root = new Slime(); + Cursor recordArray = root.setArray(); + log.forEach(record -> { + Cursor recordObject = recordArray.addObject(); + recordObject.setLong("id", record.getSequenceNumber()); + recordObject.setLong("at", record.getMillis()); + recordObject.setString("type", typeOf(record.getLevel())); + String message = record.getMessage(); + if (record.getThrown() != null) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + record.getThrown().printStackTrace(new PrintStream(buffer)); + message += "\n" + buffer; + } + recordObject.setString("message", message); + }); + return root; + } + + public static String typeOf(Level level) { + return level.getName().equals("html") ? "html" + : level.intValue() < LogLevel.INFO.intValue() ? "debug" + : level.intValue() < LogLevel.WARNING.intValue() ? "info" + : level.intValue() < LogLevel.ERROR.intValue() ? "warning" + : "error"; + } + + private class SlimeJsonResponse extends HttpResponse { + private final Slime slime; + + private SlimeJsonResponse(Slime slime) { + super(200); + this.slime = slime; + } + + @Override + public void render(OutputStream outputStream) throws IOException { + new JsonFormat(true).encode(outputStream, slime); + } + + @Override + public String getContentType() { + return CONTENT_TYPE_APPLICATION_JSON; + } + } + + private static class Response extends HttpResponse { + private static final ObjectMapper objectMapper = new ObjectMapper(); + private final String message; + + private Response(String response) { + this(200, response); + } + + private Response(int statusCode, String message) { + super(statusCode); + this.message = message; + } + + @Override + public void render(OutputStream outputStream) throws IOException { + ObjectNode objectNode = objectMapper.createObjectNode(); + objectNode.put("message", message); + objectMapper.writeValue(outputStream, objectNode); + } + + @Override + public String getContentType() { + return CONTENT_TYPE_APPLICATION_JSON; + } + } +} diff --git a/vespa-testrunner-components/src/main/resources/configdefinitions/test-runner.def b/vespa-testrunner-components/src/main/resources/configdefinitions/test-runner.def new file mode 100644 index 00000000000..a2d0eacd9be --- /dev/null +++ b/vespa-testrunner-components/src/main/resources/configdefinitions/test-runner.def @@ -0,0 +1,4 @@ +package=com.yahoo.vespa.hosted.testrunner + +artifactsPath path +surefireMemoryMb int diff --git a/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/PomXmlGeneratorTest.java b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/PomXmlGeneratorTest.java new file mode 100644 index 00000000000..dce02922c63 --- /dev/null +++ b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/PomXmlGeneratorTest.java @@ -0,0 +1,33 @@ +package com.yahoo.vespa.hosted.testrunner; + +import org.apache.commons.io.IOUtils; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author valerijf + */ +public class PomXmlGeneratorTest { + + @Test + public void write_system_tests_pom_xml() throws IOException { + List<Path> artifacts = Arrays.asList( + Paths.get("components/my-comp.jar"), + Paths.get("main.jar")); + + String actual = PomXmlGenerator.generatePomXml(TestProfile.SYSTEM_TEST, artifacts, artifacts.get(1)); + assertFile("/pom.xml_system_tests", actual); + } + + private void assertFile(String resourceFile, String actual) throws IOException { + String expected = IOUtils.toString(this.getClass().getResourceAsStream(resourceFile)); + assertEquals(resourceFile, expected, actual); + } +}
\ No newline at end of file diff --git a/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandlerTest.java b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandlerTest.java new file mode 100644 index 00000000000..a91b1308080 --- /dev/null +++ b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerHandlerTest.java @@ -0,0 +1,37 @@ +package com.yahoo.vespa.hosted.testrunner; + +import com.yahoo.vespa.config.SlimeUtils; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.time.Instant; +import java.util.Collections; +import java.util.logging.Level; +import java.util.logging.LogRecord; + +import static org.junit.Assert.assertEquals; + +/** + * @author jvenstad + */ +public class TestRunnerHandlerTest { + + @Test + public void logSerialization() throws IOException { + LogRecord record = new LogRecord(Level.INFO, "Hello."); + record.setSequenceNumber(1); + record.setInstant(Instant.ofEpochMilli(2)); + Exception exception = new RuntimeException(); + record.setThrown(exception); + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + exception.printStackTrace(new PrintStream(buffer)); + String trace = buffer.toString() + .replaceAll("\n", "\\\\n") + .replaceAll("\t", "\\\\t"); + assertEquals("[{\"id\":1,\"at\":2,\"type\":\"info\",\"message\":\"Hello.\\n" + trace + "\"}]", + new String(SlimeUtils.toJsonBytes(TestRunnerHandler.toSlime(Collections.singletonList(record))))); + } + +} diff --git a/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerTest.java b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerTest.java new file mode 100644 index 00000000000..49c95fa4b6f --- /dev/null +++ b/vespa-testrunner-components/src/test/java/com/yahoo/vespa/hosted/testrunner/TestRunnerTest.java @@ -0,0 +1,127 @@ +package com.yahoo.vespa.hosted.testrunner; + +import org.fusesource.jansi.Ansi; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.logging.LogRecord; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Unit tests relying on a UNIX shell >_< + * + * @author jvenstad + */ +public class TestRunnerTest { + + @Rule + public TemporaryFolder tmp = new TemporaryFolder(); + + private Path artifactsPath; + private Path testPath; + private Path logFile; + private Path configFile; + private Path settingsFile; + + @Before + public void setup() throws IOException { + artifactsPath = tmp.newFolder("artifacts").toPath(); + Files.createFile(artifactsPath.resolve("my-tests.jar")); + Files.createFile(artifactsPath.resolve("my-fat-test.jar")); + testPath = tmp.newFolder("testData").toPath(); + logFile = tmp.newFile("maven.log").toPath(); + configFile = tmp.newFile("testConfig.json").toPath(); + settingsFile = tmp.newFile("settings.xml").toPath(); + } + + @Test + public void ansiCodesAreConvertedToHtml() throws InterruptedException { + TestRunner runner = new TestRunner(artifactsPath, testPath, logFile, configFile, settingsFile, + __ -> new ProcessBuilder("echo", Ansi.ansi().fg(Ansi.Color.RED).a("Hello!").reset().toString())); + runner.test(TestProfile.SYSTEM_TEST, new byte[0]); + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + Iterator<LogRecord> log = runner.getLog(-1).iterator(); + log.next(); + LogRecord record = log.next(); + assertEquals("<span style=\"color: red;\">Hello!</span>", record.getMessage()); + assertEquals(0, runner.getLog(record.getSequenceNumber()).size()); + assertEquals(TestRunner.Status.SUCCESS, runner.getStatus()); + } + + @Test + public void errorLeadsToError() throws InterruptedException { + TestRunner runner = new TestRunner(artifactsPath, testPath, logFile, configFile, settingsFile, + __ -> new ProcessBuilder("This is a command that doesn't exist, for sure!")); + runner.test(TestProfile.SYSTEM_TEST, new byte[0]); + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + Iterator<LogRecord> log = runner.getLog(-1).iterator(); + log.next(); + LogRecord record = log.next(); + assertEquals("Failed to execute maven command: This is a command that doesn't exist, for sure!", record.getMessage()); + assertNotNull(record.getThrown()); + assertEquals(TestRunner.Status.ERROR, runner.getStatus()); + } + + @Test + public void failureLeadsToFailure() throws InterruptedException { + TestRunner runner = new TestRunner(artifactsPath, testPath, logFile, configFile, settingsFile, + __ -> new ProcessBuilder("false")); + runner.test(TestProfile.SYSTEM_TEST, new byte[0]); + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + assertEquals(1, runner.getLog(-1).size()); + assertEquals(TestRunner.Status.FAILURE, runner.getStatus()); + } + + @Test + public void filesAreGenerated() throws InterruptedException, IOException { + TestRunner runner = new TestRunner(artifactsPath, testPath, logFile, configFile, settingsFile, + __ -> new ProcessBuilder("echo", "Hello!")); + runner.test(TestProfile.SYSTEM_TEST, "config".getBytes()); + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + assertEquals("config", new String(Files.readAllBytes(configFile))); + assertTrue(Files.exists(testPath.resolve("pom.xml"))); + assertTrue(Files.exists(settingsFile)); + assertEquals("Hello!\n", new String(Files.readAllBytes(logFile))); + } + + @Test + public void runnerCanBeReused() throws InterruptedException, IOException { + TestRunner runner = new TestRunner(artifactsPath, testPath, logFile, configFile, settingsFile, + __ -> new ProcessBuilder("sleep", "0.1")); + runner.test(TestProfile.SYSTEM_TEST, "config".getBytes()); + assertEquals(TestRunner.Status.RUNNING, runner.getStatus()); + + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + assertEquals(1, runner.getLog(-1).size()); + assertEquals(TestRunner.Status.SUCCESS, runner.getStatus()); + + runner.test(TestProfile.STAGING_TEST, "newConfig".getBytes()); + while (runner.getStatus() == TestRunner.Status.RUNNING) { + Thread.sleep(10); + } + + assertEquals("newConfig", new String(Files.readAllBytes(configFile))); + assertEquals(1, runner.getLog(-1).size()); + } + +} diff --git a/vespa-testrunner-components/src/test/resources/pom.xml_system_tests b/vespa-testrunner-components/src/test/resources/pom.xml_system_tests new file mode 100644 index 00000000000..22382b84316 --- /dev/null +++ b/vespa-testrunner-components/src/test/resources/pom.xml_system_tests @@ -0,0 +1,72 @@ +<?xml version="1.0"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>com.yahoo.vespa</groupId> + <artifactId>tester-application</artifactId> + <version>1.0.0</version> + + <properties> + <maven_version>4.12</maven_version> + <surefire_version>2.22.0</surefire_version> + <my-comp.jar.path>components/my-comp.jar</my-comp.jar.path> + <main.jar.path>main.jar</main.jar.path> + </properties> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${maven_version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.yahoo.vespa.testrunner.test</groupId> + <artifactId>my-comp.jar</artifactId> + <scope>system</scope> + <type>test-jar</type> + <version>test</version> + <systemPath>${my-comp.jar.path}</systemPath> + </dependency> + <dependency> + <groupId>com.yahoo.vespa.testrunner.test</groupId> + <artifactId>main.jar</artifactId> + <scope>system</scope> + <type>test-jar</type> + <version>test</version> + <systemPath>${main.jar.path}</systemPath> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>${surefire_version}</version> + <configuration> + <dependenciesToScan> + <dependency>com.yahoo.vespa.testrunner.test:main.jar</dependency> + </dependenciesToScan> + <groups>com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</groups> + <excludedGroups>com.yahoo.vespa.tenant.systemtest.base.impl.EmptyExcludeGroup.class</excludedGroups> + <excludes> + <exclude>com.yahoo.vespa.tenant.cd.SystemTest, com.yahoo.vespa.tenant.systemtest.base.SystemTest</exclude> + </excludes> + <reportsDirectory>${env.TEST_DIR}</reportsDirectory> + <redirectTestOutputToFile>false</redirectTestOutputToFile> + <environmentVariables> + <LD_LIBRARY_PATH>/opt/vespa/lib64</LD_LIBRARY_PATH> + </environmentVariables> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-report-plugin</artifactId> + <version>${surefire_version}</version> + <configuration> + <reportsDirectory>${env.TEST_DIR}</reportsDirectory> + </configuration> + </plugin> + </plugins> + </build> +</project> diff --git a/vespabase/src/rhel-prestart.sh b/vespabase/src/rhel-prestart.sh index 081d7df18a4..fe067868c04 100755 --- a/vespabase/src/rhel-prestart.sh +++ b/vespabase/src/rhel-prestart.sh @@ -113,6 +113,7 @@ fixdir ${VESPA_USER} wheel 755 var/db/vespa/logcontrol fixdir ${VESPA_USER} wheel 755 var/db/vespa/search fixdir ${VESPA_USER} wheel 755 var/jdisc_core fixdir ${VESPA_USER} wheel 755 var/vespa +fixdir ${VESPA_USER} wheel 755 var/vespa/application fixdir ${VESPA_USER} wheel 755 var/vespa/bundlecache fixdir ${VESPA_USER} wheel 755 var/vespa/bundlecache/configserver fixdir ${VESPA_USER} wheel 755 var/vespa/cache/config/ diff --git a/vespalog/src/vespa/log/log_message.cpp b/vespalog/src/vespa/log/log_message.cpp index ec734747dbc..77f9b619e9f 100644 --- a/vespalog/src/vespa/log/log_message.cpp +++ b/vespalog/src/vespa/log/log_message.cpp @@ -90,6 +90,25 @@ LogMessage::LogMessage() { } +LogMessage::LogMessage(int64_t time_nanos_in, + const std::string& hostname_in, + int32_t process_id_in, + int32_t thread_id_in, + const std::string& service_in, + const std::string& component_in, + Logger::LogLevel level_in, + const std::string& payload_in) + : _time_nanos(time_nanos_in), + _hostname(hostname_in), + _process_id(process_id_in), + _thread_id(thread_id_in), + _service(service_in), + _component(component_in), + _level(level_in), + _payload(payload_in) +{ +} + LogMessage::~LogMessage() = default; diff --git a/vespalog/src/vespa/log/log_message.h b/vespalog/src/vespa/log/log_message.h index 1ca052c9e7d..832b5f6d47d 100644 --- a/vespalog/src/vespa/log/log_message.h +++ b/vespalog/src/vespa/log/log_message.h @@ -22,6 +22,14 @@ class LogMessage { public: LogMessage(); + LogMessage(int64_t time_nanos_in, + const std::string& hostname_in, + int32_t process_id_in, + int32_t thread_id_in, + const std::string& service_in, + const std::string& component_in, + Logger::LogLevel level_in, + const std::string& payload_in); ~LogMessage(); void parse_log_line(std::string_view log_line); int64_t time_nanos() const { return _time_nanos; } |