diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-02-15 19:31:32 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-02-15 19:31:32 +0100 |
commit | 288af4201cdd7e00d6a0f0242c1295b333bd44ab (patch) | |
tree | 64ec820cde183f4f9da13a5119e20dac706a91e9 /controller-server | |
parent | 4c9206d8119d1131e248419c7e1ba669c396b89b (diff) |
Maintain deploymentInfo
Diffstat (limited to 'controller-server')
9 files changed, 140 insertions, 23 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index d053106fcee..a4bded314d9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -93,6 +93,7 @@ import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.Predicate; @@ -144,6 +145,7 @@ public class ApplicationController { private final ListFlag<String> incompatibleVersions; private final BillingController billingController; private final ListFlag<String> cloudAccountsFlag; + private final Map<DeploymentId, com.yahoo.vespa.hosted.controller.api.integration.configserver.Application> deploymentInfo = new ConcurrentHashMap<>(); ApplicationController(Controller controller, CuratorDb curator, AccessControl accessControl, Clock clock, FlagSource flagSource, BillingController billingController) { @@ -206,6 +208,15 @@ public class ApplicationController { } /** + * Returns in-memory info for the given deployment pulled from the node repo. + * Info on any existing deployment can be missing if it has not yet been fetched since this instance was started. + * This is kept up to date by DeploymentInfoMaintainer. + * Accessing this is thread safe. + */ + // TODO: Replace the wire level Application by a DeploymentInfo class in the model + public Map<DeploymentId, com.yahoo.vespa.hosted.controller.api.integration.configserver.Application> deploymentInfo() { return deploymentInfo; } + + /** * Triggers reindexing for the given document types in the given clusters, for the given application. * <p> * If no clusters are given, reindexing is triggered for the entire application; otherwise diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java index 1ef94ce527c..dc04a81fe4a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java @@ -81,7 +81,7 @@ public class BcpGroupUpdater extends ControllerMaintainer { if ( successFactor == 0 ) log.log(Level.WARNING, "Could not update traffic share on any applications", lastException); else if ( successFactor < 0.9 ) - log.log(Level.FINE, "Could not update traffic share on any applications", lastException); + log.log(Level.FINE, "Could not update traffic share on all applications", lastException); return successFactor; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java index fa44b1480e3..5be20f9a994 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java @@ -30,7 +30,6 @@ public class ChangeManagementAssessor { } Assessment assessmentInner(List<String> impactedHostnames, List<Node> allNodes, ZoneId zone) { - List<String> impactedParentHosts = toParentHosts(impactedHostnames, allNodes); // Group impacted application nodes by parent host Map<Node, List<Node>> prParentHost = allNodes.stream() @@ -53,10 +52,7 @@ public class ChangeManagementAssessor { .map(node -> node.hostname()) .toList(); - boolean allHostsReplacable = tenantHosts.isEmpty() || nodeRepository.isReplaceable( - zone, - tenantHosts - ); + boolean allHostsReplacable = tenantHosts.isEmpty() || nodeRepository.isReplaceable(zone, tenantHosts); // Report assessment pr cluster var clusterAssessments = prCluster.entrySet().stream().map((entry) -> { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index b64de5d5af4..bcc8296da2a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -46,6 +46,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(osUpgradeScheduler); maintainers.addAll(osUpgraders(controller, intervals.osUpgrader)); maintainers.add(new DeploymentExpirer(controller, intervals.defaultInterval)); + maintainers.add(new DeploymentInfoMaintainer(controller, intervals.deploymentInfoMaintainer)); maintainers.add(new DeploymentUpgrader(controller, intervals.defaultInterval)); maintainers.add(new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), intervals.defaultInterval)); maintainers.add(new MetricsReporter(controller, metric, athenzClientFactory.createZmsClient())); @@ -109,6 +110,7 @@ public class ControllerMaintenance extends AbstractComponent { private final SystemName system; private final Duration defaultInterval; + private final Duration deploymentInfoMaintainer; private final Duration outstandingChangeDeployer; private final Duration versionStatusUpdater; private final Duration readyJobsTrigger; @@ -143,6 +145,7 @@ public class ControllerMaintenance extends AbstractComponent { public Intervals(SystemName system) { this.system = Objects.requireNonNull(system); this.defaultInterval = duration(system.isCd() ? 1 : 5, MINUTES); + this.deploymentInfoMaintainer = duration(system.isCd() ? 1 : 10, MINUTES); this.outstandingChangeDeployer = duration(3, MINUTES); this.versionStatusUpdater = duration(3, MINUTES); this.readyJobsTrigger = duration(1, MINUTES); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java new file mode 100644 index 00000000000..cf9db1517a0 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java @@ -0,0 +1,57 @@ +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; +import com.yahoo.yolean.Exceptions; + +import java.time.Duration; +import java.util.stream.Stream; + +/** + * This pulls application deployment information from the node repo on all config servers, + * and stores it in memory in controller.applications().deploymentInfo(). + * + * @author bratseth + */ +public class DeploymentInfoMaintainer extends ControllerMaintainer { + + private final NodeRepository nodeRepository; + + public DeploymentInfoMaintainer(Controller controller, Duration duration) { + super(controller, duration); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + } + + @Override + protected double maintain() { + controller().applications().asList().stream() + .flatMap(this::mapApplicationToInstances) + .flatMap(this::mapInstanceToDeployments) + .forEach(this::updateDeploymentInfo); + return 1.0; + } + + private Stream<Instance> mapApplicationToInstances(Application application) { + return application.instances().values().stream(); + } + + private Stream<DeploymentId> mapInstanceToDeployments(Instance instance) { + return instance.deployments().keySet().stream() + .filter(zoneId -> !zoneId.environment().isTest()) + .map(zoneId -> new DeploymentId(instance.id(), zoneId)); + } + + private void updateDeploymentInfo(DeploymentId id) { + try { + controller().applications().deploymentInfo().put(id, nodeRepository.getApplication(id.zoneId(), id.applicationId())); + } + catch (ConfigServerException e) { + log.info("Could not retrieve deployment info for " + id + ": " + Exceptions.toMessageString(e)); + } + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 44081081f73..d47b82a231f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -168,6 +168,7 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { private Stream<Map.Entry<ClusterId, List<Cluster.ScalingEvent>>> mapDeploymentToClusterScalingEvent(DeploymentId deploymentId) { try { + // TODO: get Application from controller.applications().deploymentInfo() return nodeRepository.getApplication(deploymentId.zoneId(), deploymentId.applicationId()) .clusters().entrySet().stream() .map(cluster -> Map.entry(new ClusterId(deploymentId, cluster.getKey()), cluster.getValue().scalingEvents())); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java index 9211580abf1..45b3e4ef5dd 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java @@ -38,7 +38,7 @@ import java.util.stream.Stream; * * Maintains status and execution of Vespa CMRs. * - * Currently this retires all affected tenant hosts if zone capacity allows it. + * Currently, this retires all affected tenant hosts if zone capacity allows it. * * @author olaa */ diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java index ee58320f94a..fcd2183ab8c 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java @@ -219,11 +219,19 @@ public class BcpGroupUpdaterTest { assertTrafficFraction(0.20, 0.20 + 30 / 200.0 / 2.5, context.instanceId(), us3, tester); assertTrafficFraction(0.30, 0.30 + 0.5 * 50 / 200.0 / 1.5 + 0.5 * 40 / 200.0 / 2.5, context.instanceId(), eu1, tester); - // Partial group info (missing from ap*) + // BCP group info (missing ap* regions for cluster1, and full for cluster2) setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater); setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater); setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater); setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater); + + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster2", updater); + setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster2", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster2", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster2", updater); + setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster2", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster2", updater); + assertEquals(1.0, updater.maintain(), 0.0000001); assertNoBcpGroupInfo(context.instanceId(), ap1, "cluster1", tester, "No info in ap"); @@ -233,21 +241,12 @@ public class BcpGroupUpdaterTest { assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester); assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), eu1, "cluster1", tester); - // Full BCP group info - setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster1", updater); - setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster1", updater); - setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater); - setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater); - setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater); - setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater); - assertEquals(1.0, updater.maintain(), 0.0000001); - - assertBcpGroupInfo(200.0, 0.2, 0.2, context.instanceId(), ap1, "cluster1", tester); - assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), ap2, "cluster1", tester); - assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester); - assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester); - assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester); - assertBcpGroupInfo((200 + 300) / 2.0, (0.2 + 0.3) / 2.0, (0.2 + 0.3) / 2.0, context.instanceId(), eu1, "cluster1", tester); + assertBcpGroupInfo(200.0, 0.2, 0.2, context.instanceId(), ap1, "cluster2", tester); + assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), ap2, "cluster2", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster2", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster2", tester); + assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster2", tester); + assertBcpGroupInfo((200 + 300) / 2.0, (0.2 + 0.3) / 2.0, (0.2 + 0.3) / 2.0, context.instanceId(), eu1, "cluster2", tester); } private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainerTest.java new file mode 100644 index 00000000000..5b9fb8ccb5f --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainerTest.java @@ -0,0 +1,50 @@ +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Application; +import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * @author bratseth + */ +public class DeploymentInfoMaintainerTest { + + @Test + void testDeploymentInfoMaintainer() { + ApplicationId app1 = ApplicationId.from("t1", "a1", "default"); + ApplicationId app2 = ApplicationId.from("t2", "a1", "default"); + ZoneId z1 = ZoneId.from("prod.aws-us-east-1c"); + ZoneId z2 = ZoneId.from("prod.aws-eu-west-1a"); + + DeploymentTester tester = new DeploymentTester(new ControllerTester(SystemName.Public)); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder().region(z1.region()).region(z2.region()).trustDefaultCertificate().build(); + List.of(app1, app2).forEach(app -> tester.newDeploymentContext(app).submit(applicationPackage).deploy()); + + var maintainer = new DeploymentInfoMaintainer(tester.controller(), Duration.ofMinutes(5)); + var nodeRepo = tester.configServer().nodeRepository().allowPatching(true); + nodeRepo.putApplication(z1, new Application(app1, List.of())); + nodeRepo.putApplication(z1, new Application(app2, List.of())); + assertEquals(0, tester.controller().applications().deploymentInfo().size()); + maintainer.maintain(); + assertEquals(4, tester.controller().applications().deploymentInfo().size()); + assertEquals(Set.of(new DeploymentId(app1, z1), + new DeploymentId(app1, z2), + new DeploymentId(app2, z1), + new DeploymentId(app2, z2)), + tester.controller().applications().deploymentInfo().keySet()); + } + +} |