diff options
author | Jon Bratseth <bratseth@oath.com> | 2020-07-02 14:16:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-02 14:16:27 +0200 |
commit | 0ba33ff6c954c9103521e072ce861f1a99891353 (patch) | |
tree | ed6040963a469c2c68b2797c01d80c0abb2eb008 | |
parent | 1e1b35fa0b2589994f8d01fd8676659daf94791b (diff) | |
parent | 2b099ae866e7a99b27b364f6c26157e786cc090c (diff) |
Merge pull request #13779 from vespa-engine/bratseth/healt-check-improvements
Bratseth/healt check improvements
11 files changed, 185 insertions, 110 deletions
diff --git a/config/src/main/java/com/yahoo/config/subscription/ConfigInstanceUtil.java b/config/src/main/java/com/yahoo/config/subscription/ConfigInstanceUtil.java index 9710ee607eb..82d02ec89f9 100644 --- a/config/src/main/java/com/yahoo/config/subscription/ConfigInstanceUtil.java +++ b/config/src/main/java/com/yahoo/config/subscription/ConfigInstanceUtil.java @@ -22,9 +22,9 @@ public class ConfigInstanceUtil { * Values that have not been explicitly set in the source builder, will be left unchanged * in the destination. * - * @param destination The builder to copy values into. - * @param source The builder to copy values from. Unset values are not copied. - * @param <BUILDER> The builder class. + * @param destination the builder to copy values into + * @param source the builder to copy values from. Unset values are not copied + * @param <BUILDER> the builder class */ public static<BUILDER extends ConfigBuilder> void setValues(BUILDER destination, BUILDER source) { try { @@ -56,9 +56,9 @@ public class ConfigInstanceUtil { setConfigId(i, configId); } catch (InstantiationException | InvocationTargetException | NoSuchMethodException | - NoSuchFieldException | IllegalAccessException e) { + NoSuchFieldException | IllegalAccessException e) { throw new IllegalArgumentException("Failed creating new instance of '" + type.getCanonicalName() + - "' for config id '" + configId + "': " + Exceptions.toMessageString(e), e); + "' for config id '" + configId + "'", e); } return instance; } diff --git a/config/src/main/java/com/yahoo/config/subscription/ConfigSubscriber.java b/config/src/main/java/com/yahoo/config/subscription/ConfigSubscriber.java index 814222989a1..5fcbd4f8c21 100644 --- a/config/src/main/java/com/yahoo/config/subscription/ConfigSubscriber.java +++ b/config/src/main/java/com/yahoo/config/subscription/ConfigSubscriber.java @@ -423,21 +423,35 @@ public class ConfigSubscriber implements AutoCloseable { * @return The handle of the config * @see #startConfigThread(Runnable) */ - public <T extends ConfigInstance> ConfigHandle<T> subscribe(final SingleSubscriber<T> singleSubscriber, Class<T> configClass, String configId) { - if (!subscriptionHandles.isEmpty()) - throw new IllegalStateException("Can not start single-subscription because subscriptions were previously opened on this."); - final ConfigHandle<T> handle = subscribe(configClass, configId); - if (!nextConfig()) - throw new ConfigurationRuntimeException("Initial config of " + configClass.getName() + " failed."); + public <T extends ConfigInstance> ConfigHandle<T> subscribe(SingleSubscriber<T> singleSubscriber, Class<T> configClass, String configId) { + if ( ! subscriptionHandles.isEmpty()) + throw new IllegalStateException("Can not start single-subscription because subscriptions were previously opened on this"); + + ConfigHandle<T> handle = subscribe(configClass, configId); + + if ( ! nextConfig()) + throw new ConfigurationRuntimeException("Initial config of " + configClass.getName() + " failed"); + singleSubscriber.configure(handle.getConfig()); startConfigThread(() -> { while (!isClosed()) { + boolean hasNewConfig = false; + try { - if (nextConfig()) { - if (handle.isChanged()) singleSubscriber.configure(handle.getConfig()); - } - } catch (Exception e) { - log.log(SEVERE, "Exception from config system, continuing config thread: " + Exceptions.toMessageString(e)); + hasNewConfig = nextConfig(); + } + catch (Exception e) { + log.log(SEVERE, "Exception on receiving config. Ignoring this change.", e); + } + + try { + if (hasNewConfig) + singleSubscriber.configure(handle.getConfig()); + } + catch (Exception e) { + log.warning("Exception on applying config " + configClass.getName() + + " for config id " + configId + ": Ignoring this change: " + + Exceptions.toMessageString(e)); } } } diff --git a/config/src/test/java/com/yahoo/config/subscription/ConfigInstanceUtilTest.java b/config/src/test/java/com/yahoo/config/subscription/ConfigInstanceUtilTest.java index 3bdaee09eaf..fa24988709a 100644 --- a/config/src/test/java/com/yahoo/config/subscription/ConfigInstanceUtilTest.java +++ b/config/src/test/java/com/yahoo/config/subscription/ConfigInstanceUtilTest.java @@ -14,7 +14,6 @@ import java.io.File; import java.util.Arrays; import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; @@ -165,7 +164,8 @@ public class ConfigInstanceUtilTest { ConfigInstanceUtil.getNewInstance(TestNodefsConfig.class, "id0", ConfigPayload.fromBuilder(payloadBuilder)); assert(false); } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), containsString("Failed creating new instance of 'com.yahoo.foo.TestNodefsConfig' for config id 'id0':")); + assertEquals("Failed creating new instance of 'com.yahoo.foo.TestNodefsConfig' for config id 'id0'", + e.getMessage()); } } } diff --git a/container-core/abi-spec.json b/container-core/abi-spec.json index dac33d2d431..aa2e5ccfa5f 100644 --- a/container-core/abi-spec.json +++ b/container-core/abi-spec.json @@ -36,7 +36,7 @@ ], "methods": [ "public void <init>()", - "public void setContainerHasClusters(boolean)", + "public void setClusters(java.util.Set)", "public void setReceiveTrafficByDefault(boolean)", "public void setUp(java.lang.Object)", "public void setDown(java.lang.Object)", diff --git a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java index 0ed0daa2141..e339db2f084 100644 --- a/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java +++ b/container-core/src/main/java/com/yahoo/container/handler/ClustersStatus.java @@ -5,7 +5,9 @@ import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; import java.util.HashMap; +import java.util.Iterator; import java.util.Map; +import java.util.Set; /** * A component which tracks the up/down status of any clusters which should influence @@ -37,11 +39,15 @@ public class ClustersStatus extends AbstractComponent { /** The status of clusters, when known. Note that clusters may exist for which there is no knowledge yet. */ private final Map<String, Boolean> clusterStatus = new HashMap<>(); - public void setContainerHasClusters(boolean containerHasClusters) { + /** Sets the current clusters of this container */ + public void setClusters(Set<String> clusters) { synchronized (mutex) { - this.containerHasClusters = containerHasClusters; - if ( ! containerHasClusters) - clusterStatus.clear(); // forget container clusters which was configured away + this.containerHasClusters = clusters.size() > 0; + for (Iterator<String> i = clusterStatus.keySet().iterator(); i.hasNext(); ) { + String existingCluster = i.next(); + if ( ! clusters.contains(existingCluster)) + i.remove(); // forget clusters which was configured away + } } } @@ -78,9 +84,11 @@ public class ClustersStatus extends AbstractComponent { public boolean containerShouldReceiveTraffic() { return containerShouldReceiveTraffic(Require.ONE); } + /** * Returns whether this container should receive traffic based on the state of this - * @param require Requirement for being up, ALL or ONE. + * + * @param require requirement for being up, ALL or ONE. */ public boolean containerShouldReceiveTraffic(Require require) { synchronized (mutex) { diff --git a/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java b/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java index 0bf86e8f440..e1b5b769906 100644 --- a/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java +++ b/container-core/src/main/java/com/yahoo/container/handler/VipStatus.java @@ -6,6 +6,8 @@ import com.yahoo.container.QrSearchersConfig; import com.yahoo.container.core.VipStatusConfig; import com.yahoo.container.jdisc.state.StateMonitor; +import java.util.stream.Collectors; + /** * A component which keeps track of whether or not this container instance should receive traffic * and respond that it is in good health. @@ -59,8 +61,7 @@ public class VipStatus { this.clustersStatus = clustersStatus; this.healthState = healthState; initiallyInRotation = vipStatusConfig.initiallyInRotation(); - healthState.status(StateMonitor.Status.initializing); - clustersStatus.setContainerHasClusters(! dispatchers.searchcluster().isEmpty()); + clustersStatus.setClusters(dispatchers.searchcluster().stream().map(c -> c.name()).collect(Collectors.toSet())); updateCurrentlyInRotation(); } diff --git a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java index 78b65622150..0018dd22dd9 100644 --- a/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java +++ b/container-core/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java @@ -47,11 +47,13 @@ public class StateMonitor extends AbstractComponent { @Inject public StateMonitor(HealthMonitorConfig config, Timer timer) { - this(config, timer, runnable -> { - Thread thread = new Thread(runnable, "StateMonitor"); - thread.setDaemon(true); - return thread; - }); + this(config, + timer, + runnable -> { + Thread thread = new Thread(runnable, "StateMonitor"); + thread.setDaemon(true); + return thread; + }); } StateMonitor(HealthMonitorConfig config, Timer timer, ThreadFactory threadFactory) { @@ -59,7 +61,8 @@ public class StateMonitor extends AbstractComponent { Status.valueOf(config.initialStatus()), timer, threadFactory); } - /* For Testing */ + + /* Public for testing only */ public StateMonitor(long snapshotIntervalMS, Status status, Timer timer, ThreadFactory threadFactory) { this.timer = timer; this.snapshotIntervalMs = snapshotIntervalMS; diff --git a/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java b/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java index e13debcddda..e7a9a1442f3 100644 --- a/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java +++ b/container-core/src/test/java/com/yahoo/container/handler/VipStatusTestCase.java @@ -10,74 +10,24 @@ import com.yahoo.jdisc.core.SystemTimer; import org.junit.Test; /** - * Smoke test that VipStatus has the right basic logic. - * - * @author steinar + * @author bratseth */ public class VipStatusTestCase { - private static QrSearchersConfig getSearchersConfig(String[] clusters) { - var b = new QrSearchersConfig.Builder(); - if (clusters.length > 0) { - var searchClusterB = new QrSearchersConfig.Searchcluster.Builder(); - for (String cluster : clusters) { - searchClusterB.name(cluster); - } - b.searchcluster(searchClusterB); - } - return b.build(); - } - - private static VipStatus getVipStatus(String[] clusters, StateMonitor.Status startState, boolean initiallyInRotation) { - return new VipStatus(getSearchersConfig(clusters), - new VipStatusConfig.Builder().initiallyInRotation(initiallyInRotation).build(), - new ClustersStatus(), - new StateMonitor(1000, startState, new SystemTimer(), runnable -> { - Thread thread = new Thread(runnable, "StateMonitor"); - thread.setDaemon(true); - return thread; - })); - } - - private static void remove(String[] clusters, VipStatus v) { - for (String s : clusters) { - v.removeFromRotation(s); - } - } - - private static void add(String[] clusters, VipStatus v) { - for (String s : clusters) { - v.addToRotation(s); - } - } - - private static void verifyUpOrDown(String[] clusters, StateMonitor.Status status) { - VipStatus v = getVipStatus(clusters, status, true); - remove(clusters, v); - // initial state - assertFalse(v.isInRotation()); - v.addToRotation(clusters[0]); - assertFalse(v.isInRotation()); - v.addToRotation(clusters[1]); - assertFalse(v.isInRotation()); - v.addToRotation(clusters[2]); - assertTrue(v.isInRotation()); - } - @Test public void testInitializingOrDownRequireAllUp() { String[] clusters = {"cluster1", "cluster2", "cluster3"}; - verifyUpOrDown(clusters, StateMonitor.Status.initializing); - verifyUpOrDown(clusters, StateMonitor.Status.down); + verifyStatus(clusters, StateMonitor.Status.initializing); + verifyStatus(clusters, StateMonitor.Status.down); } @Test public void testUpRequireAllDown() { String[] clusters = {"cluster1", "cluster2", "cluster3"}; - VipStatus v = getVipStatus(clusters, StateMonitor.Status.initializing, true); + VipStatus v = createVipStatus(clusters, StateMonitor.Status.initializing, true, new ClustersStatus()); assertFalse(v.isInRotation()); - add(clusters, v); + addToRotation(clusters, v); assertTrue(v.isInRotation()); v.removeFromRotation(clusters[0]); @@ -102,14 +52,119 @@ public class VipStatusTestCase { @Test public void testNoClustersConfiguringInitiallyInRotationFalse() { String[] clusters = {}; - VipStatus v = getVipStatus(clusters, StateMonitor.Status.initializing, false); + VipStatus v = createVipStatus(clusters, StateMonitor.Status.initializing, false, new ClustersStatus()); assertFalse(v.isInRotation()); } @Test public void testNoClustersConfiguringInitiallyInRotationTrue() { String[] clusters = {}; - VipStatus v = getVipStatus(clusters, StateMonitor.Status.initializing, true); + VipStatus v = createVipStatus(clusters, StateMonitor.Status.initializing, true, new ClustersStatus()); + assertTrue(v.isInRotation()); + } + + @Test + public void testClusterRemovalRemovedIsDown() { + assertClusterRemoval(true, false); + } + + @Test + public void testClusterRemovalRemovedIsUp() { + assertClusterRemoval(false, false); + } + + @Test + public void testClusterRemovalAnotherIsDown() { + assertClusterRemoval(false, true); + } + + private void assertClusterRemoval(boolean removedIsDown, boolean anotherIsDown) { + ClustersStatus clustersStatus = new ClustersStatus(); + StateMonitor stateMonitor = createStateMonitor(StateMonitor.Status.initializing); + + String[] clusters = {"cluster1", "cluster2", "cluster3"}; + + VipStatus v = createVipStatus(clusters, true, clustersStatus, stateMonitor); + assertFalse(v.isInRotation()); + assertEquals(StateMonitor.Status.initializing, stateMonitor.status()); + + addToRotation(clusters, v); + assertTrue(v.isInRotation()); + assertEquals(StateMonitor.Status.up, stateMonitor.status()); + + String[] newClusters = {"cluster2", "cluster3"}; + if (removedIsDown) + v.removeFromRotation("cluster1"); + if (anotherIsDown) + v.removeFromRotation("cluster3"); + v = createVipStatus(newClusters, true, clustersStatus, stateMonitor); + assertTrue(v.isInRotation()); + assertEquals(StateMonitor.Status.up, stateMonitor.status()); + + v.removeFromRotation(newClusters[0]); + if ( ! anotherIsDown) + assertTrue(v.isInRotation()); + + v.removeFromRotation(newClusters[1]); + assertFalse(v.isInRotation()); // Both remaining clusters are out + assertEquals(StateMonitor.Status.down, stateMonitor.status()); + } + + private static QrSearchersConfig createSearchersConfig(String[] clusters) { + var b = new QrSearchersConfig.Builder(); + for (String cluster : clusters) { + var searchCluster = new QrSearchersConfig.Searchcluster.Builder(); + searchCluster.name(cluster); + b.searchcluster(searchCluster); + } + return b.build(); + } + + private static VipStatus createVipStatus(String[] clusters, + StateMonitor.Status startState, + boolean initiallyInRotation, + ClustersStatus clustersStatus) { + return createVipStatus(clusters, initiallyInRotation, clustersStatus, createStateMonitor(startState)); + } + + private static VipStatus createVipStatus(String[] clusters, + boolean initiallyInRotation, + ClustersStatus clustersStatus, + StateMonitor stateMonitor) { + return new VipStatus(createSearchersConfig(clusters), + new VipStatusConfig.Builder().initiallyInRotation(initiallyInRotation).build(), + clustersStatus, + stateMonitor); + } + + private static StateMonitor createStateMonitor(StateMonitor.Status startState) { + return new StateMonitor(1000, startState, new SystemTimer(), runnable -> { + Thread thread = new Thread(runnable, "StateMonitor"); + thread.setDaemon(true); + return thread; + }); + } + + private static void removeFromRotation(String[] clusters, VipStatus v) { + for (String s : clusters) + v.removeFromRotation(s); + } + + private static void addToRotation(String[] clusters, VipStatus v) { + for (String s : clusters) + v.addToRotation(s); + } + + private static void verifyStatus(String[] clusters, StateMonitor.Status status) { + VipStatus v = createVipStatus(clusters, status, true, new ClustersStatus()); + removeFromRotation(clusters, v); + // initial state + assertFalse(v.isInRotation()); + v.addToRotation(clusters[0]); + assertFalse(v.isInRotation()); + v.addToRotation(clusters[1]); + assertFalse(v.isInRotation()); + v.addToRotation(clusters[2]); assertTrue(v.isInRotation()); } diff --git a/container-di/src/main/java/com/yahoo/container/di/CloudSubscriberFactory.java b/container-di/src/main/java/com/yahoo/container/di/CloudSubscriberFactory.java index bd3d146cfec..1133363be8e 100644 --- a/container-di/src/main/java/com/yahoo/container/di/CloudSubscriberFactory.java +++ b/container-di/src/main/java/com/yahoo/container/di/CloudSubscriberFactory.java @@ -103,29 +103,27 @@ public class CloudSubscriberFactory implements SubscriberFactory { @Override public long waitNextGeneration() { - if (handles.isEmpty()) { + if (handles.isEmpty()) throw new IllegalStateException("No config keys registered"); - } - /* Catch and just log config exceptions due to missing config values for parameters that do - * not have a default value. These exceptions occur when the user has removed a component - * from services.xml, and the component takes a config that has parameters without a - * default value in the def-file. There is a new 'components' config underway, where the - * component is removed, so this old config generation will soon be replaced by a new one. */ + // Catch and just log config exceptions due to missing config values for parameters that do + // not have a default value. These exceptions occur when the user has removed a component + // from services.xml, and the component takes a config that has parameters without a + // default value in the def-file. There is a new 'components' config underway, where the + // component is removed, so this old config generation will soon be replaced by a new one. boolean gotNextGen = false; int numExceptions = 0; while ( ! gotNextGen) { try { - if (subscriber.nextGeneration()) { + if (subscriber.nextGeneration()) gotNextGen = true; - } - } catch (IllegalArgumentException e) { + } + catch (IllegalArgumentException e) { numExceptions++; - log.log(Level.WARNING, "Got exception from the config system (please ignore the exception if you just removed " - + "a component from your application that used the mentioned config): ", e); - if (numExceptions >= 5) { - throw new IllegalArgumentException("Failed retrieving the next config generation.", e); - } + log.log(Level.WARNING, "Got exception from the config system (ignore if you just removed a " + + "component from your application that used the mentioned config): ", e); + if (numExceptions >= 5) + throw new IllegalArgumentException("Failed retrieving the next config generation", e); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index df8a7e45917..e8639561599 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -260,7 +260,6 @@ class NodeAllocation { node = node.unretire(); } else { ++wasRetiredJustNow; - // Retire nodes which are of an unwanted flavor, retired flavor or have an overlapping parent host node = node.retire(nodeRepository.clock().instant()); } if ( ! node.allocation().get().membership().cluster().equals(cluster)) { diff --git a/vdslib/src/main/java/com/yahoo/vdslib/distribution/Distribution.java b/vdslib/src/main/java/com/yahoo/vdslib/distribution/Distribution.java index 52708ea6d8e..c9acd625373 100644 --- a/vdslib/src/main/java/com/yahoo/vdslib/distribution/Distribution.java +++ b/vdslib/src/main/java/com/yahoo/vdslib/distribution/Distribution.java @@ -97,11 +97,8 @@ public class Distribution { parent.addSubGroup(group); } } - if (root == null) { - throw new IllegalStateException("Got config that did not " - + "specify even a root group. Need a root group at" - + "\nminimum:\n" + config.toString()); - } + if (root == null) + throw new IllegalStateException("Config does not specify a root group"); root.calculateDistributionHashValues(); Distribution.this.config.setRelease(new Config(root, config.redundancy(), config.distributor_auto_ownership_transfer_on_whole_group_down())); } catch (ParseException e) { |