diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2021-02-16 18:49:09 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2021-02-19 17:40:10 +0100 |
commit | 2b1828f8861320af8924954aaf223a221ce3e7c4 (patch) | |
tree | 7a1008edbc51887df98ee2f622e21fa0f016f7b0 | |
parent | 08f8d5424752c522c4b074b7d47945647dc298b0 (diff) |
Add maintainer which flips the switch (not yet), for eligible (flagged) apps
6 files changed, 99 insertions, 0 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java index 20e23edddce..24dc2022e3c 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java @@ -71,4 +71,10 @@ public interface Deployer { /** Timeout in server, clients can use this to set correct client timeout */ default Duration serverDeployTimeout() { return Duration.ofMinutes(30); } + /** Turn on dedicated cluster controllers for the given application. */ + default void setDedicatedClusterControllerCluster(ApplicationId id) { } + + /** Get whether the given application uses dedicated cluster controllers. */ + default boolean getDedicatedClusterControllerCluster(ApplicationId id) { return false; } + } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 535b2eb083c..28c4b8897b1 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -1049,6 +1049,16 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye @Override public Duration serverDeployTimeout() { return Duration.ofSeconds(configserverConfig.zookeeper().barrierTimeout()); } + @Override + public void setDedicatedClusterControllerCluster(ApplicationId id) { + requireDatabase(id).setDedicatedClusterControllerCluster(id); + } + + @Override + public boolean getDedicatedClusterControllerCluster(ApplicationId id) { + return requireDatabase(id).getDedicatedClusterControllerCluster(id); + } + private static void logConfigChangeActions(ConfigChangeActions actions, DeployLogger logger) { RestartActions restartActions = actions.getRestartActions(); if ( ! restartActions.isEmpty()) { diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index b00711311c5..07359e706a2 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -245,6 +245,13 @@ public class Flags { "JVM max heap size for cluster controller in Mb", "Takes effect when restarting cluster controller"); + public static final UnboundBooleanFlag DEDICATED_CLUSTER_CONTROLLER_CLUSTER = defineFeatureFlag( + "dedicated-cluster-controller-cluster", false, + List.of("jonmv"), "2021-02-15", "2021-04-15", + "Makes application eligible for switching to a dedicated, shared cluster controller cluster, by a maintainer", + "Takes effect immediately", + APPLICATION_ID); + public static final UnboundListFlag<String> ALLOWED_ATHENZ_PROXY_IDENTITIES = defineListFlag( "allowed-athenz-proxy-identities", List.of(), String.class, List.of("bjorncs", "tokle"), "2021-02-10", "2021-08-01", diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DedicatedClusterControllerClusterMigrator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DedicatedClusterControllerClusterMigrator.java new file mode 100644 index 00000000000..0bfd4e027ad --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DedicatedClusterControllerClusterMigrator.java @@ -0,0 +1,70 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Deployer; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.DayOfWeek; +import java.time.Duration; +import java.time.ZonedDateTime; +import java.util.Date; +import java.util.List; +import java.util.Set; +import java.util.logging.Level; + +import static java.time.DayOfWeek.SATURDAY; +import static java.time.DayOfWeek.SUNDAY; +import static java.util.stream.Collectors.toUnmodifiableSet; + +/** + * @author jonmv + */ +public class DedicatedClusterControllerClusterMigrator extends ApplicationMaintainer { + + private final BooleanFlag flag; + + protected DedicatedClusterControllerClusterMigrator(Deployer deployer, Metric metric, NodeRepository nodeRepository, + Duration interval, FlagSource flags) { + super(deployer, metric, nodeRepository, interval); + this.flag = Flags.DEDICATED_CLUSTER_CONTROLLER_CLUSTER.bindTo(flags); + } + + @Override + protected Set<ApplicationId> applicationsNeedingMaintenance() { + ZonedDateTime date = ZonedDateTime.ofInstant(clock().instant(), java.time.ZoneId.of("Europe/Oslo")); + if (List.of(SATURDAY, SUNDAY).contains(date.getDayOfWeek()) || date.getHour() < 8 || 12 < date.getHour()) + return Set.of(); + + return nodeRepository().applications().ids().stream() + .filter(this::isEligible) + .filter(this::hasNotSwitched) + .filter(this::isQuiescent) + .limit(1) + .peek(this::migrate) + .collect(toUnmodifiableSet()); + } + + private boolean isEligible(ApplicationId id) { + return flag.with(FetchVector.Dimension.APPLICATION_ID, id.serializedForm()).value(); + } + + private boolean hasNotSwitched(ApplicationId id) { + return ! deployer().getDedicatedClusterControllerCluster(id); + } + + private boolean isQuiescent(ApplicationId id) { + return false; // Check all content nodes are UP, have wanted state UP, and can be moved to MAINTENANCE. + } + + private void migrate(ApplicationId id) { + log.log(Level.INFO, "Migrating " + id + " to dedicated cluster controller cluster"); + deployer().setDedicatedClusterControllerCluster(id); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index bfd885cc32e..4d3f0fd4039 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -64,6 +64,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval)); maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric)); maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer)); + maintainers.add(new DedicatedClusterControllerClusterMigrator(deployer, metric, nodeRepository, defaults.dedicatedClusterControllerMigratorInterval, flagSource)); provisionServiceProvider.getLoadBalancerService(nodeRepository) .map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)) @@ -113,6 +114,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration autoscalingInterval; private final Duration scalingSuggestionsInterval; private final Duration switchRebalancerInterval; + private final Duration dedicatedClusterControllerMigratorInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -141,6 +143,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { switchRebalancerInterval = Duration.ofHours(1); throttlePolicy = NodeFailer.ThrottlePolicy.hosted; retiredExpiry = Duration.ofDays(4); // give up migrating data after 4 days + dedicatedClusterControllerMigratorInterval = Duration.ofHours(3); if (zone.environment() == Environment.prod && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json index b31c597e2b0..bd4029ec0c0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json @@ -4,6 +4,9 @@ "name": "AutoscalingMaintainer" }, { + "name": "DedicatedClusterControllerClusterMigrator" + }, + { "name": "DirtyExpirer" }, { |