diff options
9 files changed, 233 insertions, 4 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 1026af7f823..4930c4a815f 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -273,6 +273,12 @@ public class Flags { "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); + public static final UnboundBooleanFlag DYNAMIC_CONFIG_SERVER_PROVISIONING = defineFeatureFlag( + "dynamic-config-server-provisioning", false, + List.of("mpolden"), "2021-02-26", "2021-05-01", + "Enable dynamic provisioning of config servers", + "Takes effect immediately"); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, String createdAt, String expiresAt, String description, diff --git a/node-repository/pom.xml b/node-repository/pom.xml index fb46735ec73..8e2a7ca0627 100644 --- a/node-repository/pom.xml +++ b/node-repository/pom.xml @@ -83,6 +83,12 @@ <version>${project.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>zookeeper-server-common</artifactId> + <version>${project.version}</version> + <scope>provided</scope> + </dependency> <!-- compile --> <dependency> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurer.java new file mode 100644 index 00000000000..97113d663d4 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurer.java @@ -0,0 +1,53 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.zookeeper.Reconfigurer; +import com.yahoo.vespa.zookeeper.ZooKeeperServer; + +import java.time.Duration; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Reconfigure members of the config server ZooKeeper cluster, according to the config servers currently active in the + * node repository. + * + * @author mpolden + */ +public class ConfigServerReconfigurer extends NodeRepositoryMaintainer { + + /** Minimum number of config servers required before attempting reconfiguration */ + private static final int MIN_ACTIVE_NODES = 3; + + private final Reconfigurer reconfigurer; + private final BooleanFlag featureFlag; + + public ConfigServerReconfigurer(NodeRepository nodeRepository, Duration interval, Metric metric, Reconfigurer reconfigurer) { + super(nodeRepository, interval, metric); + this.reconfigurer = reconfigurer; + this.featureFlag = Flags.DYNAMIC_CONFIG_SERVER_PROVISIONING.bindTo(nodeRepository.flagSource()); + } + + @Override + protected boolean maintain() { + if (!nodeRepository().zone().getCloud().dynamicProvisioning()) return true; + if (!featureFlag.value()) return true; + + NodeList configNodes = nodeRepository().nodes().list(Node.State.active) + .nodeType(NodeType.config); + if (configNodes.size() < MIN_ACTIVE_NODES) return true; + List<ZooKeeperServer> servers = configNodes.stream() + .map(node -> new ZooKeeperServer(node.allocation().get().membership().index(), node.hostname())) + .collect(Collectors.toList()); + reconfigurer.reconfigure(servers); + return true; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 8929d7f9939..01bbaffa1ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -17,6 +17,7 @@ import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; +import com.yahoo.vespa.zookeeper.Reconfigurer; import java.time.Duration; import java.util.List; @@ -38,7 +39,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, - MetricsFetcher metricsFetcher, MetricsDb metricsDb) { + MetricsFetcher metricsFetcher, MetricsDb metricsDb, + Reconfigurer reconfigurer) { DefaultTimes defaults = new DefaultTimes(zone, deployer); PeriodicApplicationMaintainer periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval, @@ -65,6 +67,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval)); maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric)); maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer)); + maintainers.add(new ConfigServerReconfigurer(nodeRepository, defaults.configServerReconfigurerInterval, metric, reconfigurer)); if (Set.of(Environment.staging, Environment.perf, Environment.prod).contains(zone.environment()) || (zone.system().isCd() && zone.environment() == Environment.dev)) // TODO: Temporarily when testing the feature maintainers.add(new DedicatedClusterControllerClusterMigrator(deployer, metric, nodeRepository, defaults.dedicatedClusterControllerMigratorInterval, flagSource, orchestrator)); @@ -117,6 +120,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration autoscalingInterval; private final Duration scalingSuggestionsInterval; private final Duration switchRebalancerInterval; + private final Duration configServerReconfigurerInterval; private final Duration dedicatedClusterControllerMigratorInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -144,6 +148,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { scalingSuggestionsInterval = Duration.ofMinutes(31); spareCapacityMaintenanceInterval = Duration.ofMinutes(30); switchRebalancerInterval = Duration.ofHours(1); + configServerReconfigurerInterval = Duration.ofSeconds(90); throttlePolicy = NodeFailer.ThrottlePolicy.hosted; retiredExpiry = Duration.ofDays(4); // give up migrating data after 4 days dedicatedClusterControllerMigratorInterval = zone.environment() == Environment.staging || zone.system().isCd() ? Duration.ofMinutes(3) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java index 5e40c0bd9ff..e1850b03944 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java @@ -29,6 +29,7 @@ public class ContainerConfig { " <component id='com.yahoo.vespa.hosted.provision.testutils.MockMetricsFetcher'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockReconfigurer'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" + " <component id='com.yahoo.vespa.flags.InMemoryFlagSource'/>\n" + " <component id='com.yahoo.config.provision.Zone'/>\n" + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockReconfigurer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockReconfigurer.java new file mode 100644 index 00000000000..0b18c144621 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockReconfigurer.java @@ -0,0 +1,56 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.vespa.zookeeper.ReconfigException; +import com.yahoo.vespa.zookeeper.Reconfigurer; +import com.yahoo.vespa.zookeeper.Sleeper; +import com.yahoo.vespa.zookeeper.VespaZooKeeperAdmin; +import com.yahoo.vespa.zookeeper.ZooKeeperServer; + +import java.time.Duration; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +/** + * @author mpolden + */ +public class MockReconfigurer extends Reconfigurer { + + private List<ZooKeeperServer> servers = List.of(); + private int reconfigurations = 0; + + public MockReconfigurer() { + super(new MockVespaZooKeperAdmin(), new Sleeper() { + @Override + public void sleep(Duration duration) { + // Ignored + } + }); + } + + @Override + public void reconfigure(List<ZooKeeperServer> wantedServers) { + servers = wantedServers.stream() + .sorted(Comparator.comparing(ZooKeeperServer::id)) + .collect(Collectors.toUnmodifiableList()); + reconfigurations++; + } + + public List<ZooKeeperServer> servers() { + return servers; + } + + public int reconfigurations() { + return reconfigurations; + } + + private static class MockVespaZooKeperAdmin implements VespaZooKeeperAdmin { + + @Override + public void reconfigure(String connectionSpec, String joiningServers, String leavingServers) throws ReconfigException { + } + + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurerTest.java new file mode 100644 index 00000000000..1b7d90e426b --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ConfigServerReconfigurerTest.java @@ -0,0 +1,90 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Cloud; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockReconfigurer; +import com.yahoo.vespa.zookeeper.ZooKeeperServer; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +/** + * @author mpolden + */ +public class ConfigServerReconfigurerTest { + + @Test + public void maintain() { + InMemoryFlagSource flagSource = new InMemoryFlagSource(); + flagSource.withBooleanFlag(Flags.DYNAMIC_CONFIG_SERVER_PROVISIONING.id(), true); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Cloud.builder().dynamicProvisioning(true).build(), + SystemName.defaultSystem(), + Environment.defaultEnvironment(), + RegionName.defaultName())) + .hostProvisioner(new MockHostProvisioner()) + .flagSource(flagSource) + .build(); + MockReconfigurer reconfigurer = new MockReconfigurer(); + ConfigServerReconfigurer maintainer = new ConfigServerReconfigurer(tester.nodeRepository(), Duration.ofDays(1), + new TestMetric(), reconfigurer); + + // Initially there are not enough config servers to trigger reconfiguration + tester.makeConfigServers(2, 1, "default"); + maintainer.maintain(); + assertEquals("No change: Too few servers", List.of(), reconfigurer.servers()); + + // Another is added, triggering reconfiguration + NodeList configServer = tester.makeConfigServers(1, 3, "default"); + maintainer.maintain(); + List<ZooKeeperServer> configuredServers = List.of(new ZooKeeperServer(0, "cfg1"), + new ZooKeeperServer(1, "cfg2"), + new ZooKeeperServer(2, "cfg3")); + assertEquals("Reconfigured", configuredServers, reconfigurer.servers()); + assertEquals(1, reconfigurer.reconfigurations()); + + // A config server is deallocated, no longer enough active nodes to reconfigure + tester.nodeRepository().nodes().deallocate(configServer.first().get(), Agent.system, this.getClass().getSimpleName()); + maintainer.maintain(); + assertEquals("No change: Too few active servers", configuredServers, reconfigurer.servers()); + assertEquals(1, reconfigurer.reconfigurations()); + } + + private static class MockHostProvisioner implements HostProvisioner { + + @Override + public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndexes, NodeResources resources, ApplicationId applicationId, Version osVersion, HostSharing sharing) { + return List.of(); + } + + @Override + public List<Node> provision(Node host, Set<Node> children) throws FatalProvisioningException { + return List.of(); + } + + @Override + public void deprovision(Node host) { + } + + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 23f504a9c0f..6ebdfe984a0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.component.Version; +import com.yahoo.component.Vtag; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; @@ -441,11 +442,19 @@ public class ProvisioningTester { return nodes; } - public NodeList makeConfigServers(int n, String flavor, Version configServersVersion) { + public NodeList makeConfigServers(int n, int startIndex, String flavor) { + return makeConfigServers(n, startIndex, flavor, Vtag.currentVersion); + } + + public NodeList makeConfigServers(int n, String flavor, Version version) { + return makeConfigServers(n, 1, flavor, version); + } + + public NodeList makeConfigServers(int n, int startIndex, String flavor, Version version) { List<Node> nodes = new ArrayList<>(n); MockNameResolver nameResolver = (MockNameResolver)nodeRepository().nameResolver(); - for (int i = 1; i <= n; i++) { + for (int i = startIndex; i < startIndex + n; i++) { String hostname = "cfg" + i; String ipv4 = "127.0.1." + i; @@ -461,7 +470,7 @@ public class ProvisioningTester { ConfigServerApplication application = new ConfigServerApplication(); List<HostSpec> hosts = prepare(application.getApplicationId(), - application.getClusterSpecWithVersion(configServersVersion), + application.getClusterSpecWithVersion(version), application.getCapacity()); activate(application.getApplicationId(), new HashSet<>(hosts)); return nodeRepository.nodes().list(Node.State.active).owner(application.getApplicationId()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json index bd4029ec0c0..d788b321af9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json @@ -4,6 +4,9 @@ "name": "AutoscalingMaintainer" }, { + "name": "ConfigServerReconfigurer" + }, + { "name": "DedicatedClusterControllerClusterMigrator" }, { |