diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-02-27 13:46:20 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-02-27 13:46:20 +0100 |
commit | a0446f4d137752e278e8cf22df23ac7abb107299 (patch) | |
tree | d4cc73871f90a6dc0a4a5409aacd48ba473c146f | |
parent | 174745d431c59bfd7d8077b817dc38090010fd35 (diff) | |
parent | bec1ef560992ebcd0a46f37cff6b52b93169e858 (diff) |
Merge branch 'master' into bratseth/dont-validate-on-reload
116 files changed, 1762 insertions, 1357 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java index da5ea7f975d..7648432dc11 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java @@ -9,6 +9,10 @@ import java.util.Objects; * @author bjorncs */ public class ClusterId { + // Common cluster IDs + public static final ClusterId ADMIN = new ClusterId("admin"); + public static final ClusterId NODE_ADMIN = new ClusterId("node-admin"); + public static final ClusterId ROUTING = new ClusterId("routing"); private final String id; diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java index 784ff9d1e38..0054264d42f 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java @@ -9,6 +9,9 @@ import java.util.Objects; * @author bjorncs */ public class ServiceType { + // Common service types. + public static final ServiceType CONTAINER = new ServiceType("container"); + public static final ServiceType SLOBROK = new ServiceType("slobrok"); private final String id; diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeType.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeType.java index 8553b07d683..875fa83c0bb 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeType.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeType.java @@ -9,18 +9,27 @@ package com.yahoo.config.provision; public enum NodeType { /** A host of a set of (docker) tenant nodes */ - host, + host(true), /** Nodes running the shared proxy layer */ - proxy, + proxy(false), /** A node to be assigned to a tenant to run application workloads */ - tenant, + tenant(false), /** A config server */ - config, + config(false), /** A host of a (docker) config server node */ - confighost + confighost(true); + private boolean isDockerHost; + + NodeType(boolean isDockerHost) { + this.isDockerHost = isDockerHost; + } + + public boolean isDockerHost() { + return isDockerHost; + } } diff --git a/configdefinitions/src/vespa/configserver.def b/configdefinitions/src/vespa/configserver.def index f730758d58f..77f45b104b2 100644 --- a/configdefinitions/src/vespa/configserver.def +++ b/configdefinitions/src/vespa/configserver.def @@ -46,5 +46,5 @@ payloadCompressionType enum { UNCOMPRESSED, LZ4 } default=LZ4 # Athenz config loadBalancerAddress string default="" -# File distribution -disableFiledistributor bool default=true +# Node admin +nodeAdminInContainer bool default=true diff --git a/configserver/src/main/resources/configserver-app/services.xml b/configserver/src/main/resources/configserver-app/services.xml index a2ef382a5a7..a129d7288ce 100644 --- a/configserver/src/main/resources/configserver-app/services.xml +++ b/configserver/src/main/resources/configserver-app/services.xml @@ -53,6 +53,7 @@ <preprocess:include file='hosted-vespa/scoreboard.xml' required='false' /> <preprocess:include file='controller/container.xml' required='false' /> <component id="com.yahoo.vespa.service.monitor.internal.SlobrokMonitorManagerImpl" bundle="service-monitor" /> + <component id="com.yahoo.vespa.service.monitor.internal.HealthMonitorManager" bundle="service-monitor" /> <component id="com.yahoo.vespa.service.monitor.internal.ServiceMonitorImpl" bundle="service-monitor" /> <component id="com.yahoo.vespa.orchestrator.ServiceMonitorInstanceLookupService" bundle="orchestrator" /> <component id="com.yahoo.vespa.orchestrator.status.ZookeeperStatusService" bundle="orchestrator" /> diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java index 7933a23c45f..5e9c8d73b38 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java @@ -1,18 +1,21 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.integration; +import java.util.Objects; + /** * @author jvenstad */ public interface BuildService { /** - * Enqueue a job defined by "buildJob in an external build system, and return the outcome of the enqueue request. - * This method should return @false only when a retry is in order, and @true otherwise, e.g., on success, or for + * Enqueue a job defined by buildJob in an external build system, and return the outcome of the enqueue request. + * This method should return false only when a retry is in order, and true otherwise, e.g., on success, or for * invalid jobs. */ boolean trigger(BuildJob buildJob); + class BuildJob { private final long projectId; @@ -27,7 +30,23 @@ public interface BuildService { public String jobName() { return jobName; } @Override - public String toString() { return jobName + "@" + projectId; } + public boolean equals(Object o) { + if (this == o) return true; + if ( ! (o instanceof BuildJob)) return false; + BuildJob buildJob = (BuildJob) o; + return projectId == buildJob.projectId && + Objects.equals(jobName, buildJob.jobName); + } + + @Override + public String toString() { + return jobName + "@" + projectId; + } + + @Override + public int hashCode() { + return Objects.hash(projectId, jobName); + } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyService.java deleted file mode 100644 index 61cd738314a..00000000000 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyService.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.api.integration.security; - -/** - * A service for retrieving secrets, such as API keys, private keys and passwords. - * - * @author mpolden - * @author bjorncs - */ -public interface KeyService { - - String getSecret(String key); - - default String getSecret(String key, int version) { - throw new UnsupportedOperationException("KeyService implementation does not support versioned secrets"); - } - -} diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyServiceMock.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyServiceMock.java deleted file mode 100644 index d2a4b675f6d..00000000000 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/KeyServiceMock.java +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.api.integration.security; - -/** - * @author mpolden - */ -public class KeyServiceMock implements KeyService { - - @Override - public String getSecret(String key) { - return "fake-secret-for-" + key; - } - -} diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/package-info.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/package-info.java deleted file mode 100644 index 296eebf8ea5..00000000000 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/security/package-info.java +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -@ExportPackage -package com.yahoo.vespa.hosted.controller.api.integration.security; - -import com.yahoo.osgi.annotation.ExportPackage; diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java new file mode 100644 index 00000000000..2f6307ae10d --- /dev/null +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java @@ -0,0 +1,12 @@ +package com.yahoo.vespa.hosted.controller.api.integration.stubs; + +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; + +public class MockBuildService implements BuildService { + + @Override + public boolean trigger(BuildJob buildJob) { + return true; + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index 0ec00f61311..303f5d5484b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -35,10 +35,12 @@ import com.yahoo.vespa.serviceview.bindings.ApplicationView; import java.net.URI; import java.time.Clock; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -208,10 +210,21 @@ public class Controller extends AbstractComponent { " to " + printableVersion(newStatus.systemVersion())); } curator.writeVersionStatus(newStatus); + // Removes confidence overrides for versions that no longer exist in the system + removeConfidenceOverride(version -> newStatus.versions().stream() + .noneMatch(vespaVersion -> vespaVersion.versionNumber() + .equals(version))); } /** Returns the latest known version status. Calling this is free but the status may be slightly out of date. */ public VersionStatus versionStatus() { return curator.readVersionStatus(); } + + /** Remove confidence override for versions matching given filter */ + public void removeConfidenceOverride(Predicate<Version> filter) { + Map<Version, VespaVersion.Confidence> overrides = new LinkedHashMap<>(curator().readConfidenceOverrides()); + overrides.keySet().removeIf(filter); + curator.writeConfidenceOverrides(overrides); + } /** Returns the current system version: The controller should drive towards running all applications on this version */ public Version systemVersion() { @@ -244,7 +257,7 @@ public class Controller extends AbstractComponent { return nodeRepositoryClient; } - private String printableVersion(Optional<VespaVersion> vespaVersion) { + private static String printableVersion(Optional<VespaVersion> vespaVersion) { return vespaVersion.map(v -> v.versionNumber().toFullString()).orElse("Unknown"); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/BuildSystem.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/BuildSystem.java deleted file mode 100644 index 15b3ef7fb83..00000000000 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/BuildSystem.java +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; - -import java.util.List; - -/** - * @author jvenstad - * @author mpolden - */ -public interface BuildSystem { - - /** - * Add a job for the given application to the build system - * - * @param application the application owning the job - * @param jobType the job type to be queued - * @param first whether the job should be added to the front of the queue - */ - void addJob(ApplicationId application, JobType jobType, boolean first); - - /** Remove and return a list of jobs which should be run now */ - List<BuildJob> takeJobsToRun(); - - /** Get a list of all jobs currently waiting to run */ - List<BuildJob> jobs(); - - /** Removes all queued jobs for the given application */ - void removeJobs(ApplicationId applicationId); - -} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutor.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutor.java new file mode 100644 index 00000000000..fde5b311cd9 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutor.java @@ -0,0 +1,53 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; +import com.yahoo.vespa.hosted.controller.maintenance.JobControl; +import com.yahoo.vespa.hosted.controller.maintenance.Maintainer; + +import java.time.Duration; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Triggers deployment jobs in an external BuildService. + * + * Triggering is performed by an Executor, as there is no guarantee the BuildService provides a timely response. + * + * @author jvenstad + */ +public class DeploymentJobExecutor extends Maintainer { + + private static final Logger log = Logger.getLogger(DeploymentJobExecutor.class.getName()); + private static final int triggeringRetries = 5; + + private final BuildService buildService; + private final Executor executor; + + public DeploymentJobExecutor(Controller controller, Duration triggeringInterval, JobControl jobControl, BuildService buildService) { + this(controller, triggeringInterval, jobControl, buildService, Executors.newFixedThreadPool(20)); + } + + DeploymentJobExecutor(Controller controller, Duration triggeringInterval, JobControl jobControl, + BuildService buildService, Executor executor) { + super(controller, triggeringInterval, jobControl); + this.buildService = buildService; + this.executor = executor; + } + + @Override + protected void maintain() { + controller().applications().deploymentTrigger().deploymentQueue().takeJobsToRun() + .forEach(buildJob -> executor.execute(() -> { + for (int i = 0; i < triggeringRetries; i++) + if (buildService.trigger(buildJob)) + return; + + log.log(Level.WARNING, "Exhausted all " + triggeringRetries + " retries for " + buildJob + " without success."); + })); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java new file mode 100644 index 00000000000..385de7b5a30 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueue.java @@ -0,0 +1,96 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.deployment; + +import com.google.common.collect.ImmutableList; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; +import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; +import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; + +import java.util.Deque; +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; + +/** + * Stores a queue for each type of job, and offers jobs from each of these to a periodic + * polling mechanism which is responsible for triggering the offered jobs in an external build service. + * + * @author jvenstad + */ +public class DeploymentQueue { + + private final Controller controller; + private final CuratorDb curator; + + public DeploymentQueue(Controller controller, CuratorDb curator) { + this.controller = controller; + this.curator = curator; + } + + /** Add the given application to the queue of the given job type -- in front if first, at the back otherwise. */ + public void addJob(ApplicationId applicationId, JobType jobType, boolean first) { + locked(jobType, queue -> { + if ( ! queue.contains(applicationId)) { + if (first) + queue.addFirst(applicationId); + else + queue.addLast(applicationId); + } + }); + } + + /** List all jobs currently enqueued. */ + public List<BuildJob> jobs() { + ImmutableList.Builder<BuildJob> builder = ImmutableList.builder(); + for (JobType jobType : JobType.values()) + for (ApplicationId id : curator.readJobQueue(jobType)) + toBuildJob(id, jobType).ifPresent(builder::add); + + return builder.build(); + } + + /** Remove and return a set of jobs to run. This set will contain only one of each job type for capacity constrained zones. */ + public List<BuildJob> takeJobsToRun() { + ImmutableList.Builder<BuildJob> builder = ImmutableList.builder(); + for (JobType jobType : JobType.values()) + locked(jobType, queue -> + queue.stream() + .limit(isCapacityConstrained(jobType) ? 1 : Long.MAX_VALUE) + .peek(id -> toBuildJob(id, jobType).ifPresent(builder::add)) + .forEach(queue::remove)); + + return builder.build(); + } + + /** Remove all enqueued jobs for the given application. */ + public void removeJobs(ApplicationId applicationId) { + for (JobType jobType : JobType.values()) + locked(jobType, queue -> { + while (queue.remove(applicationId)); // Keep removing until not found. + }); + } + + /** Lock the job queues and read, modify, and store the queue for the given job type. */ + private void locked(JobType jobType, Consumer<Deque<ApplicationId>> modifications) { + try (Lock lock = curator.lockJobQueues()) { + Deque<ApplicationId> queue = curator.readJobQueue(jobType); + modifications.accept(queue); + curator.writeJobQueue(jobType, queue); + } + } + + private static boolean isCapacityConstrained(JobType jobType) { + return jobType == JobType.stagingTest || jobType == JobType.systemTest; + } + + private Optional<BuildJob> toBuildJob(ApplicationId applicationId, JobType jobType) { + return controller.applications().get(applicationId) + .flatMap(application -> application.deploymentJobs().projectId()) + .map(projectId -> new BuildJob(projectId, jobType.jobName())); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index c768aea8248..ee448775cbf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -47,7 +47,7 @@ public class DeploymentTrigger { private final Controller controller; private final Clock clock; - private final BuildSystem buildSystem; + private final DeploymentQueue deploymentQueue; private final DeploymentOrder order; public DeploymentTrigger(Controller controller, CuratorDb curator, Clock clock) { @@ -56,7 +56,7 @@ public class DeploymentTrigger { Objects.requireNonNull(clock,"clock cannot be null"); this.controller = controller; this.clock = clock; - this.buildSystem = new PolledBuildSystem(controller, curator); + this.deploymentQueue = new DeploymentQueue(controller, curator); this.order = new DeploymentOrder(controller); this.jobTimeout = controller.system().equals(SystemName.main) ? Duration.ofHours(12) : Duration.ofHours(1); } @@ -64,7 +64,7 @@ public class DeploymentTrigger { /** Returns the time in the past before which jobs are at this moment considered unresponsive */ public Instant jobTimeoutLimit() { return clock.instant().minus(jobTimeout); } - public BuildSystem buildSystem() { return buildSystem; } + public DeploymentQueue deploymentQueue() { return deploymentQueue; } public DeploymentOrder deploymentOrder() { return order; } @@ -270,7 +270,7 @@ public class DeploymentTrigger { */ public void cancelChange(ApplicationId applicationId) { applications().lockOrThrow(applicationId, application -> { - buildSystem.removeJobs(application.id()); + deploymentQueue.removeJobs(application.id()); applications().store(application.withChange(Change.empty())); }); } @@ -345,7 +345,7 @@ public class DeploymentTrigger { log.info(String.format("Triggering %s for %s, %s: %s", jobType, application, application.change().isPresent() ? "deploying " + application.change() : "restarted deployment", reason)); - buildSystem.addJob(application.id(), jobType, first); + deploymentQueue.addJob(application.id(), jobType, first); return application.withJobTriggering(jobType, application.change(), clock.instant(), diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java deleted file mode 100644 index e25db10a8cd..00000000000 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.curator.Lock; -import com.yahoo.vespa.hosted.controller.Controller; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.List; -import java.util.Optional; -import java.util.logging.Logger; - -/** - * Stores a queue for each type of job, and offers jobs from each of these to a periodic - * polling mechanism which is responsible for triggering the offered jobs in an external build service. - * - * @author jvenstad - * @author mpolden - */ -public class PolledBuildSystem implements BuildSystem { - - private static final Logger log = Logger.getLogger(PolledBuildSystem.class.getName()); - - // The number of jobs to offer, on each poll, for zones that have limited capacity - private static final int maxCapacityConstrainedJobsToOffer = 2; - - private final Controller controller; - private final CuratorDb curator; - - public PolledBuildSystem(Controller controller, CuratorDb curator) { - this.controller = controller; - this.curator = curator; - } - - @Override - public void addJob(ApplicationId application, JobType jobType, boolean first) { - try (Lock lock = curator.lockJobQueues()) { - Deque<ApplicationId> queue = curator.readJobQueue(jobType); - if ( ! queue.contains(application)) { - if (first) { - queue.addFirst(application); - } else { - queue.add(application); - } - } - curator.writeJobQueue(jobType, queue); - } - } - - @Override - public List<BuildJob> jobs() { - return getJobs(false); - } - - @Override - public List<BuildJob> takeJobsToRun() { - return getJobs(true); - } - - - @Override - public void removeJobs(ApplicationId application) { - try (Lock lock = curator.lockJobQueues()) { - for (JobType jobType : JobType.values()) { - Deque<ApplicationId> queue = curator.readJobQueue(jobType); - while (queue.remove(application)) { - // keep removing until not found - } - curator.writeJobQueue(jobType, queue); - } - } - } - - private List<BuildJob> getJobs(boolean removeFromQueue) { - int capacityConstrainedJobsOffered = 0; - try (Lock lock = curator.lockJobQueues()) { - List<BuildJob> jobsToRun = new ArrayList<>(); - for (JobType jobType : JobType.values()) { - Deque<ApplicationId> queue = curator.readJobQueue(jobType); - for (ApplicationId a : queue) { - ApplicationId application = removeFromQueue ? queue.poll() : a; - - Optional<Long> projectId = projectId(application); - if (projectId.isPresent()) { - jobsToRun.add(new BuildJob(projectId.get(), jobType.jobName())); - } else { - log.warning("Not queuing " + jobType.jobName() + " for " + application.toShortString() + - " because project ID is missing"); - } - - // Return a limited number of jobs at a time for capacity constrained zones - if (removeFromQueue && isCapacityConstrained(jobType) && - ++capacityConstrainedJobsOffered >= maxCapacityConstrainedJobsToOffer) { - break; - } - } - if (removeFromQueue) - curator.writeJobQueue(jobType, queue); - } - return Collections.unmodifiableList(jobsToRun); - } - } - - private Optional<Long> projectId(ApplicationId applicationId) { - return controller.applications().require(applicationId).deploymentJobs().projectId(); - } - - private static boolean isCapacityConstrained(JobType jobType) { - return jobType == JobType.stagingTest || jobType == JobType.systemTest; - } - -} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index f6dc1326d5e..e72ec5224f1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -5,9 +5,11 @@ import com.yahoo.component.AbstractComponent; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.dns.NameService; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.organization.OwnershipIssues; import com.yahoo.vespa.hosted.controller.api.integration.organization.DeploymentIssues; import com.yahoo.vespa.hosted.controller.api.integration.chef.Chef; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentJobExecutor; import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; @@ -36,12 +38,13 @@ public class ControllerMaintenance extends AbstractComponent { private final DeploymentMetricsMaintainer deploymentMetricsMaintainer; private final ApplicationOwnershipConfirmer applicationOwnershipConfirmer; private final DnsMaintainer dnsMaintainer; + private final DeploymentJobExecutor deploymentJobExecutor; @SuppressWarnings("unused") // instantiated by Dependency Injection public ControllerMaintenance(MaintainerConfig maintainerConfig, Controller controller, CuratorDb curator, JobControl jobControl, Metric metric, Chef chefClient, DeploymentIssues deploymentIssues, OwnershipIssues ownershipIssues, - NameService nameService) { + NameService nameService, BuildService buildService) { Duration maintenanceInterval = Duration.ofMinutes(maintainerConfig.intervalMinutes()); this.jobControl = jobControl; deploymentExpirer = new DeploymentExpirer(controller, maintenanceInterval, jobControl); @@ -56,6 +59,7 @@ public class ControllerMaintenance extends AbstractComponent { deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, Duration.ofMinutes(10), jobControl); applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, Duration.ofHours(12), jobControl, ownershipIssues); dnsMaintainer = new DnsMaintainer(controller, Duration.ofHours(12), jobControl, nameService); + deploymentJobExecutor = new DeploymentJobExecutor(controller, Duration.ofSeconds(30), jobControl, buildService); } public Upgrader upgrader() { return upgrader; } @@ -77,6 +81,7 @@ public class ControllerMaintenance extends AbstractComponent { deploymentMetricsMaintainer.deconstruct(); applicationOwnershipConfirmer.deconstruct(); dnsMaintainer.deconstruct(); + deploymentJobExecutor.deconstruct(); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java index ebab2054d4f..40563c4cf95 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java @@ -41,8 +41,6 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { protected Controller controller() { return controller; } - protected CuratorDb curator() { return jobControl.curator(); } - @Override public void run() { try { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 75f348904dd..8c661e7db9d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -9,11 +9,14 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; @@ -42,12 +45,12 @@ public class Upgrader extends Maintainer { public void maintain() { // Determine target versions for each upgrade policy Optional<Version> canaryTarget = controller().versionStatus().systemVersion().map(VespaVersion::versionNumber); - Optional<Version> defaultTarget = newestVersionWithConfidence(VespaVersion.Confidence.normal); - Optional<Version> conservativeTarget = newestVersionWithConfidence(VespaVersion.Confidence.high); + Optional<Version> defaultTarget = newestVersionWithConfidence(Confidence.normal); + Optional<Version> conservativeTarget = newestVersionWithConfidence(Confidence.high); // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation for (VespaVersion version : controller().versionStatus().versions()) { - if (version.confidence() == VespaVersion.Confidence.broken) + if (version.confidence() == Confidence.broken) cancelUpgradesOf(applications().without(UpgradePolicy.canary).upgradingTo(version.versionNumber()), version.versionNumber() + " is broken"); } @@ -67,7 +70,7 @@ public class Upgrader extends Maintainer { conservativeTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.conservative), target)); } - private Optional<Version> newestVersionWithConfidence(VespaVersion.Confidence confidence) { + private Optional<Version> newestVersionWithConfidence(Confidence confidence) { return reversed(controller().versionStatus().versions()).stream() .filter(v -> v.confidence().equalOrHigherThan(confidence)) .findFirst() @@ -124,18 +127,20 @@ public class Upgrader extends Maintainer { curator.writeUpgradesPerMinute(n); } - /** - * Returns whether to ignore confidence calculations when upgrading - */ - public boolean ignoreConfidence() { - return curator.readIgnoreConfidence(); + /** Override confidence for given version. This will cause the computed confidence to be ignored */ + public void overrideConfidence(Version version, Confidence confidence) { + Map<Version, Confidence> overrides = new LinkedHashMap<>(curator.readConfidenceOverrides()); + overrides.put(version, confidence); + curator.writeConfidenceOverrides(overrides); } - /** - * Controls whether to ignore confidence calculations or not - */ - public void ignoreConfidence(boolean value) { - curator.writeIgnoreConfidence(value); + /** Returns all confidence overrides */ + public Map<Version, Confidence> confidenceOverrides() { + return curator.readConfidenceOverrides(); } + /** Remove confidence override for given version */ + public void removeConfidenceOverride(Version version) { + controller().removeConfidenceOverride(v -> v.equals(version)); + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java new file mode 100644 index 00000000000..c56d8b3849c --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java @@ -0,0 +1,42 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + + +import com.yahoo.component.Version; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.ObjectTraverser; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Serializes overrides of version confidence. + * + * @author mpolden + */ +public class ConfidenceOverrideSerializer { + + private final static String overridesField = "overrides"; + + public Slime toSlime(Map<Version, Confidence> overrides) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + Cursor object = root.setObject(overridesField); + overrides.forEach((version, confidence) -> object.setString(version.toString(), confidence.name())); + return slime; + } + + public Map<Version, Confidence> fromSlime(Slime slime) { + Cursor root = slime.get(); + Cursor overridesObject = root.field(overridesField); + Map<Version, Confidence> overrides = new LinkedHashMap<>(); + overridesObject.traverse((ObjectTraverser) (name, value) -> { + overrides.put(Version.fromString(name), Confidence.valueOf(value.asString())); + }); + return Collections.unmodifiableMap(overrides); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index a3bb191fc38..bef33e739be 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.persistence; import com.google.inject.Inject; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.path.Path; import com.yahoo.transaction.NestedTransaction; @@ -11,6 +12,7 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import java.io.IOException; import java.io.UncheckedIOException; @@ -21,6 +23,7 @@ import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -123,33 +126,29 @@ public class CuratorDb { } public void writeInactiveJobs(Set<String> inactiveJobs) { - NestedTransaction transaction = new NestedTransaction(); curator.set(inactiveJobsPath(), stringSetSerializer.toJson(inactiveJobs)); - transaction.commit(); } public Deque<ApplicationId> readJobQueue(DeploymentJobs.JobType jobType) { try { Optional<byte[]> data = curator.getData(jobQueuePath(jobType)); - if (! data.isPresent() || data.get().length == 0) return new ArrayDeque<>(); // job queue has never been written + if ( ! data.isPresent() || data.get().length == 0) return new ArrayDeque<>(); // job queue has never been written return jobQueueSerializer.fromJson(data.get()); } catch (RuntimeException e) { - log.log(Level.WARNING, "Error reading job queue, deleting inactive state"); - writeInactiveJobs(Collections.emptySet()); + log.log(Level.WARNING, "Error reading job queue of type '" + jobType.jobName() + "'; deleting it."); + writeJobQueue(jobType, Collections::emptyIterator); return new ArrayDeque<>(); } } - public void writeJobQueue(DeploymentJobs.JobType jobType, Deque<ApplicationId> queue) { - NestedTransaction transaction = new NestedTransaction(); + public void writeJobQueue(DeploymentJobs.JobType jobType, Iterable<ApplicationId> queue) { curator.set(jobQueuePath(jobType), jobQueueSerializer.toJson(queue)); - transaction.commit(); } public double readUpgradesPerMinute() { Optional<byte[]> n = curator.getData(upgradesPerMinutePath()); - if (!n.isPresent() || n.get().length == 0) { + if ( ! n.isPresent() || n.get().length == 0) { return 0.5; // Default if value has never been written } return ByteBuffer.wrap(n.get()).getDouble(); @@ -159,69 +158,78 @@ public class CuratorDb { if (n < 0) { throw new IllegalArgumentException("Upgrades per minute must be >= 0"); } - NestedTransaction transaction = new NestedTransaction(); curator.set(upgradesPerMinutePath(), ByteBuffer.allocate(Double.BYTES).putDouble(n).array()); - transaction.commit(); - } - - public boolean readIgnoreConfidence() { - Optional<byte[]> value = curator.getData(ignoreConfidencePath()); - if (! value.isPresent() || value.get().length == 0) { - return false; // Default if value has never been written - } - return ByteBuffer.wrap(value.get()).getInt() == 1; - } - - public void writeIgnoreConfidence(boolean value) { - NestedTransaction transaction = new NestedTransaction(); - curator.set(ignoreConfidencePath(), ByteBuffer.allocate(Integer.BYTES).putInt(value ? 1 : 0).array()); - transaction.commit(); } - + public void writeVersionStatus(VersionStatus status) { VersionStatusSerializer serializer = new VersionStatusSerializer(); - NestedTransaction transaction = new NestedTransaction(); try { curator.set(versionStatusPath(), SlimeUtils.toJsonBytes(serializer.toSlime(status))); } catch (IOException e) { throw new UncheckedIOException("Failed to serialize version status", e); } - transaction.commit(); } public VersionStatus readVersionStatus() { Optional<byte[]> data = curator.getData(versionStatusPath()); - if (!data.isPresent() || data.get().length == 0) { + if ( ! data.isPresent() || data.get().length == 0) { return VersionStatus.empty(); // Default if status has never been written } VersionStatusSerializer serializer = new VersionStatusSerializer(); return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); } + public void writeConfidenceOverrides(Map<Version, VespaVersion.Confidence> overrides) { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + try { + curator.set(confidenceOverridesPath(), SlimeUtils.toJsonBytes(serializer.toSlime(overrides))); + } catch (IOException e) { + throw new UncheckedIOException("Failed to serialize confidence overrides", e); + } + } + + public Map<Version, VespaVersion.Confidence> readConfidenceOverrides() { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + Optional<byte[]> data = curator.getData(confidenceOverridesPath()); + if (!data.isPresent() || data.get().length == 0) { + return Collections.emptyMap(); + } + return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); + } + + // The following methods are called by internal code + + @SuppressWarnings("unused") public Optional<byte[]> readProvisionState(String provisionId) { return curator.getData(provisionStatePath(provisionId)); } + @SuppressWarnings("unused") public void writeProvisionState(String provisionId, byte[] data) { curator.set(provisionStatePath(provisionId), data); } + @SuppressWarnings("unused") public List<String> readProvisionStateIds() { return curator.getChildren(provisionStatePath()); } + @SuppressWarnings("unused") public Optional<byte[]> readVespaServerPool() { return curator.getData(vespaServerPoolPath()); } + @SuppressWarnings("unused") public void writeVespaServerPool(byte[] data) { curator.set(vespaServerPoolPath(), data); } + @SuppressWarnings("unused") public Optional<byte[]> readOpenStackServerPool() { return curator.getData(openStackServerPoolPath()); } + @SuppressWarnings("unused") public void writeOpenStackServerPool(byte[] data) { curator.set(openStackServerPoolPath(), data); } @@ -252,37 +260,39 @@ public class CuratorDb { return lockPath; } - private Path inactiveJobsPath() { + private static Path inactiveJobsPath() { return root.append("inactiveJobs"); } - private Path jobQueuePath(DeploymentJobs.JobType jobType) { + private static Path jobQueuePath(DeploymentJobs.JobType jobType) { return root.append("jobQueues").append(jobType.name()); } - private Path upgradesPerMinutePath() { + private static Path upgradesPerMinutePath() { return root.append("upgrader").append("upgradesPerMinute"); } - private Path ignoreConfidencePath() { - return root.append("upgrader").append("ignoreConfidence"); + private static Path confidenceOverridesPath() { + return root.append("upgrader").append("confidenceOverrides"); } - private Path versionStatusPath() { return root.append("versionStatus"); } + private static Path versionStatusPath() { + return root.append("versionStatus"); + } - private Path provisionStatePath() { + private static Path provisionStatePath() { return root.append("provisioning").append("states"); } - private Path provisionStatePath(String provisionId) { + private static Path provisionStatePath(String provisionId) { return provisionStatePath().append(provisionId); } - private Path vespaServerPoolPath() { + private static Path vespaServerPoolPath() { return root.append("vespaServerPool"); } - private Path openStackServerPoolPath() { + private static Path openStackServerPoolPath() { return root.append("openStackServerPool"); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/JobQueueSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/JobQueueSerializer.java index 5017624f286..f6d6406a820 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/JobQueueSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/JobQueueSerializer.java @@ -12,18 +12,19 @@ import java.io.IOException; import java.util.ArrayDeque; import java.util.Deque; import java.util.HashSet; +import java.util.List; import java.util.Set; /** * Serialization of a queue of ApplicationIds to/from Json bytes using Slime. * - * The set is serialized as an array of string. + * The queue is serialized as an array of strings. * * @author bratseth */ public class JobQueueSerializer { - public byte[] toJson(Deque<ApplicationId> queue) { + public byte[] toJson(Iterable<ApplicationId> queue) { try { Slime slime = new Slime(); Cursor array = slime.setArray(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java index 6b60b49e1ef..18648d4a488 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java @@ -31,7 +31,7 @@ public class VersionStatusSerializer { // VespaVersion fields private static final String releaseCommitField = "releaseCommit"; - private static final String releasedAtField = "releasedAt"; + private static final String committedAtField = "releasedAt"; private static final String isCurrentSystemVersionField = "isCurrentSystemVersion"; private static final String deploymentStatisticsField = "deploymentStatistics"; private static final String confidenceField = "confidence"; @@ -61,7 +61,7 @@ public class VersionStatusSerializer { private void vespaVersionToSlime(VespaVersion version, Cursor object) { object.setString(releaseCommitField, version.releaseCommit()); - object.setLong(releasedAtField, version.releasedAt().toEpochMilli()); + object.setLong(committedAtField, version.committedAt().toEpochMilli()); object.setBool(isCurrentSystemVersionField, version.isCurrentSystemVersion()); deploymentStatisticsToSlime(version.statistics(), object.setObject(deploymentStatisticsField)); object.setString(confidenceField, version.confidence().name()); @@ -92,7 +92,7 @@ public class VersionStatusSerializer { private VespaVersion vespaVersionFromSlime(Inspector object) { return new VespaVersion(deploymentStatisticsFromSlime(object.field(deploymentStatisticsField)), object.field(releaseCommitField).asString(), - Instant.ofEpochMilli(object.field(releasedAtField).asLong()), + Instant.ofEpochMilli(object.field(committedAtField).asLong()), object.field(isCurrentSystemVersionField).asBool(), configServersFromSlime(object.field(configServersField)), VespaVersion.Confidence.valueOf(object.field(confidenceField).asString()) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java index a9eaaf4048c..6fefb7099f1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java @@ -1,12 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.restapi.controller; +import com.yahoo.component.Version; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; import com.yahoo.io.IOUtils; import com.yahoo.slime.Inspector; -import com.yahoo.text.Utf8; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance; import com.yahoo.vespa.hosted.controller.maintenance.Upgrader; @@ -14,11 +14,13 @@ import com.yahoo.vespa.hosted.controller.restapi.ErrorResponse; import com.yahoo.vespa.hosted.controller.restapi.MessageResponse; import com.yahoo.vespa.hosted.controller.restapi.Path; import com.yahoo.vespa.hosted.controller.restapi.ResourceResponse; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.util.Scanner; import java.util.logging.Level; /** @@ -67,15 +69,15 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse post(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return overrideConfidence(request, path.get("version")); return notFound(path); } private HttpResponse delete(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return removeConfidenceOverride(path.get("version")); return notFound(path); } @@ -100,24 +102,41 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse configureUpgrader(HttpRequest request) { String upgradesPerMinuteField = "upgradesPerMinute"; - String ignoreConfidenceField = "ignoreConfidence"; + String confidenceOverrideField = "confidenceOverride"; byte[] jsonBytes = toJsonBytes(request.getData()); Inspector inspect = SlimeUtils.jsonToSlime(jsonBytes).get(); Upgrader upgrader = maintenance.upgrader(); + if (inspect.field(upgradesPerMinuteField).valid()) { upgrader.setUpgradesPerMinute(inspect.field(upgradesPerMinuteField).asDouble()); - } else if (inspect.field(ignoreConfidenceField).valid()) { - upgrader.ignoreConfidence(inspect.field(ignoreConfidenceField).asBool()); } else { - return ErrorResponse.badRequest("Unable to configure upgrader with data in request: '" + - Utf8.toString(jsonBytes) + "'"); + return ErrorResponse.badRequest("No such modifiable field(s)"); } return new UpgraderResponse(maintenance.upgrader()); } - private byte[] toJsonBytes(InputStream jsonStream) { + private HttpResponse removeConfidenceOverride(String version) { + maintenance.upgrader().removeConfidenceOverride(Version.fromString(version)); + return new UpgraderResponse(maintenance.upgrader()); + } + + private HttpResponse overrideConfidence(HttpRequest request, String version) { + Confidence confidence = Confidence.valueOf(asString(request.getData())); + maintenance.upgrader().overrideConfidence(Version.fromString(version), confidence); + return new UpgraderResponse(maintenance.upgrader()); + } + + private static String asString(InputStream in) { + Scanner scanner = new Scanner(in).useDelimiter("\\A"); + if (scanner.hasNext()) { + return scanner.next(); + } + return ""; + } + + private static byte[] toJsonBytes(InputStream jsonStream) { try { return IOUtils.readBytes(jsonStream, 1000 * 1000); } catch (IOException e) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java index 0e6e0030ecf..beb6c98e447 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java @@ -27,7 +27,11 @@ public class UpgraderResponse extends HttpResponse { Slime slime = new Slime(); Cursor root = slime.setObject(); root.setDouble("upgradesPerMinute", upgrader.upgradesPerMinute()); - root.setBool("ignoreConfidence", upgrader.ignoreConfidence()); + Cursor array = root.setArray("confidenceOverrides"); + upgrader.confidenceOverrides().forEach((version, confidence) -> { + Cursor object = array.addObject(); + object.setString(version.toString(), confidence.name()); + }); new JsonFormat(true).encode(outputStream, slime); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 8338d341a2b..f9d3901765f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -84,7 +84,7 @@ public class DeploymentApiHandler extends LoggingRequestHandler { versionObject.setString("version", version.versionNumber().toString()); versionObject.setString("confidence", version.confidence().name()); versionObject.setString("commit", version.releaseCommit()); - versionObject.setLong("date", version.releasedAt().toEpochMilli()); + versionObject.setLong("date", version.committedAt().toEpochMilli()); versionObject.setBool("controllerVersion", version.isSelfVersion()); versionObject.setBool("systemVersion", version.isCurrentSystemVersion()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilter.java index 5be7fe03319..8a539beb83a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilter.java @@ -112,8 +112,7 @@ public class ControllerAuthorizationFilter implements SecurityRequestFilter { private static boolean isWhiteListedOperation(Path path, Method method) { return path.matches("/screwdriver/v1/jobsToRun") || // TODO EOL'ed API, remove this once api is gone path.matches("/application/v4/user") && method == PUT || // Create user tenant - path.matches("/application/v4/tenant/{tenant}") && method == POST || // Create tenant - path.matches("/screwdriver/v1/jobreport"); // TODO To be migrated to application/v4 + path.matches("/application/v4/tenant/{tenant}") && method == POST; // Create tenant } private static boolean isHostedOperatorOperation(Path path, Method method) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java index 14f3e6b2f61..3a8d74d2c2e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java @@ -5,26 +5,18 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; -import com.yahoo.io.IOUtils; import com.yahoo.jdisc.http.HttpRequest.Method; import com.yahoo.slime.Cursor; -import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; -import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.application.SourceRevision; import com.yahoo.vespa.hosted.controller.restapi.ErrorResponse; import com.yahoo.vespa.hosted.controller.restapi.Path; import com.yahoo.vespa.hosted.controller.restapi.SlimeJsonResponse; -import com.yahoo.vespa.hosted.controller.restapi.StringResponse; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.yolean.Exceptions; -import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.Optional; @@ -58,7 +50,6 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { switch (method) { case GET: return get(request); case POST: return post(request); - case DELETE: return delete(request); default: return ErrorResponse.methodNotAllowed("Method '" + method + "' is unsupported"); } } catch (IllegalArgumentException|IllegalStateException e) { @@ -75,30 +66,19 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { return vespaVersion(); } if (path.matches("/screwdriver/v1/jobsToRun")) { - return buildJobs(controller.applications().deploymentTrigger().buildSystem().jobs()); + return buildJobs(controller.applications().deploymentTrigger().deploymentQueue().jobs()); } return notFound(request); } private HttpResponse post(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/screwdriver/v1/jobreport")) { - return notifyJobCompletion(request); - } if (path.matches("/screwdriver/v1/trigger/tenant/{tenant}/application/{application}")) { return trigger(request, path.get("tenant"), path.get("application")); } return notFound(request); } - private HttpResponse delete(HttpRequest request) { - Path path = new Path(request.getUri().getPath()); - if (path.matches("/screwdriver/v1/jobsToRun")) { - return buildJobs(controller.applications().deploymentTrigger().buildSystem().takeJobsToRun()); - } - return notFound(request); - } - private HttpResponse trigger(HttpRequest request, String tenantName, String applicationName) { JobType jobType = Optional.of(asString(request.getData())) .filter(s -> !s.isEmpty()) @@ -131,7 +111,7 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { Cursor cursor = slime.setObject(); cursor.setString("version", version.versionNumber().toString()); cursor.setString("sha", version.releaseCommit()); - cursor.setLong("date", version.releasedAt().toEpochMilli()); + cursor.setLong("date", version.committedAt().toEpochMilli()); return new SlimeJsonResponse(slime); } @@ -147,52 +127,6 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { return new SlimeJsonResponse(slime); } - /** - * @deprecated Method migrated to application v4 - this method will be removed soon. - */ - @Deprecated - private HttpResponse notifyJobCompletion(HttpRequest request) { - controller.applications().notifyJobCompletion(toJobReport(toSlime(request.getData()).get())); - return new StringResponse("ok"); - } - - private Slime toSlime(InputStream jsonStream) { - try { - byte[] jsonBytes = IOUtils.readBytes(jsonStream, 1000 * 1000); - return SlimeUtils.jsonToSlime(jsonBytes); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private JobReport toJobReport(Inspector report) { - Optional<JobError> jobError = Optional.empty(); - if (report.field("jobError").valid()) { - jobError = Optional.of(JobError.valueOf(report.field("jobError").asString())); - } - return new JobReport( - ApplicationId.from( - report.field("tenant").asString(), - report.field("application").asString(), - report.field("instance").asString()), - JobType.fromJobName(report.field("jobName").asString()), - report.field("projectId").asLong(), - report.field("buildNumber").asLong(), - toSourceRevision(report.field("sourceRevision")), - jobError - ); - } - - private static Optional<SourceRevision> toSourceRevision(Inspector object) { - if (!object.field("repository").valid() || - !object.field("branch").valid() || - !object.field("commit").valid()) { - return Optional.empty(); - } - return Optional.of(new SourceRevision(object.field("repository").asString(), object.field("branch").asString(), - object.field("commit").asString())); - } - private static String asString(InputStream in) { Scanner scanner = new Scanner(in).useDelimiter("\\A"); if (scanner.hasNext()) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java index 9bc7b7a22d8..d628489bc29 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java @@ -177,18 +177,19 @@ public class VersionStatus { Collection<String> configServerHostnames, Controller controller) { GitSha gitSha = controller.gitHub().getCommit(VESPA_REPO_OWNER, VESPA_REPO, statistics.version().toFullString()); - Instant releasedAt = Instant.ofEpochMilli(gitSha.commit.author.date.getTime()); // commitedAt ... - VespaVersion.Confidence confidence; - // Always compute confidence for system version - if (isSystemVersion) { - confidence = VespaVersion.confidenceFrom(statistics, controller); - } else { - // Keep existing confidence for non-system versions if already computed - confidence = confidenceFor(statistics.version(), controller) - .orElse(VespaVersion.confidenceFrom(statistics, controller)); + Instant committedAt = Instant.ofEpochMilli(gitSha.commit.author.date.getTime()); + VespaVersion.Confidence confidence = controller.curator().readConfidenceOverrides().get(statistics.version()); + // Compute confidence if there's no override + if (confidence == null) { + if (isSystemVersion) { // Always compute confidence for system version + confidence = VespaVersion.confidenceFrom(statistics, controller); + } else { // Keep existing confidence for non-system versions if already computed + confidence = confidenceFor(statistics.version(), controller) + .orElse(VespaVersion.confidenceFrom(statistics, controller)); + } } return new VespaVersion(statistics, - gitSha.sha, releasedAt, + gitSha.sha, committedAt, isSystemVersion, configServerHostnames, confidence diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index ea89a70543c..1aa94507b61 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -6,7 +6,6 @@ import com.yahoo.component.Version; import com.yahoo.component.Vtag; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.application.ApplicationList; -import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Instant; import java.util.Collection; @@ -25,18 +24,18 @@ import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; public class VespaVersion implements Comparable<VespaVersion> { private final String releaseCommit; - private final Instant releasedAt; + private final Instant committedAt; private final boolean isCurrentSystemVersion; private final DeploymentStatistics statistics; private final ImmutableSet<String> configServerHostnames; private final Confidence confidence; - public VespaVersion(DeploymentStatistics statistics, String releaseCommit, Instant releasedAt, + public VespaVersion(DeploymentStatistics statistics, String releaseCommit, Instant committedAt, boolean isCurrentSystemVersion, Collection<String> configServerHostnames, Confidence confidence) { this.statistics = statistics; this.releaseCommit = releaseCommit; - this.releasedAt = releasedAt; + this.committedAt = committedAt; this.isCurrentSystemVersion = isCurrentSystemVersion; this.configServerHostnames = ImmutableSet.copyOf(configServerHostnames); this.confidence = confidence; @@ -57,7 +56,7 @@ public class VespaVersion implements Comparable<VespaVersion> { return Confidence.broken; // 'broken' if 4 non-canary was broken by this, and that is at least 10% of all - if (nonCanaryApplicationsBroken(statistics.version(), failingOnThis, productionOnThis, controller.curator())) + if (nonCanaryApplicationsBroken(statistics.version(), failingOnThis, productionOnThis)) return Confidence.broken; // 'low' unless all canary applications are upgraded @@ -79,7 +78,7 @@ public class VespaVersion implements Comparable<VespaVersion> { public String releaseCommit() { return releaseCommit; } /** Returns the time of the release commit */ - public Instant releasedAt() { return releasedAt; } + public Instant committedAt() { return committedAt; } /** Statistics about deployment of this version */ public DeploymentStatistics statistics() { return statistics; } @@ -143,12 +142,11 @@ public class VespaVersion implements Comparable<VespaVersion> { private static boolean nonCanaryApplicationsBroken(Version version, ApplicationList failingOnThis, - ApplicationList productionOnThis, - CuratorDb curator) { + ApplicationList productionOnThis) { ApplicationList failingNonCanaries = failingOnThis.without(UpgradePolicy.canary).startedFailingOn(version); ApplicationList productionNonCanaries = productionOnThis.without(UpgradePolicy.canary); - if (productionNonCanaries.size() + failingNonCanaries.size() == 0 || curator.readIgnoreConfidence()) return false; + if (productionNonCanaries.size() + failingNonCanaries.size() == 0) return false; // 'broken' if 4 non-canary was broken by this, and that is at least 10% of all int brokenByThisVersion = failingNonCanaries.size(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 25ea29f92cc..22730cd2fb2 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -23,6 +23,7 @@ import com.yahoo.vespa.hosted.controller.api.identifiers.PropertyId; import com.yahoo.vespa.hosted.controller.api.identifiers.ScrewdriverId; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; import com.yahoo.vespa.hosted.controller.api.identifiers.UserGroup; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.dns.Record; import com.yahoo.vespa.hosted.controller.api.integration.dns.RecordName; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; @@ -37,7 +38,7 @@ import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.SourceRevision; import com.yahoo.vespa.hosted.controller.athenz.mock.AthenzDbMock; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; -import com.yahoo.vespa.hosted.controller.deployment.BuildSystem; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentQueue; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.deployment.BuildJob; import com.yahoo.vespa.hosted.controller.persistence.ApplicationSerializer; @@ -308,7 +309,7 @@ public class ControllerTest { for (int i = 0; i < versions.size(); i++) { VespaVersion c = versions.get(i); if (c.isCurrentSystemVersion()) - versions.set(i, new VespaVersion(c.statistics(), c.releaseCommit(), c.releasedAt(), + versions.set(i, new VespaVersion(c.statistics(), c.releaseCommit(), c.committedAt(), false, c.configServerHostnames(), c.confidence())); } @@ -461,7 +462,7 @@ public class ControllerTest { Application app1 = tester.createApplication("app1", "tenant1", project1, 1L); Application app2 = tester.createApplication("app2", "tenant2", project2, 1L); Application app3 = tester.createApplication("app3", "tenant3", project3, 1L); - BuildSystem buildSystem = tester.controller().applications().deploymentTrigger().buildSystem(); + DeploymentQueue deploymentQueue = tester.controller().applications().deploymentTrigger().deploymentQueue(); // all applications: system-test completes successfully tester.jobCompletion(component).application(app1).uploadArtifact(applicationPackage).submit(); @@ -474,7 +475,7 @@ public class ControllerTest { tester.deployAndNotify(app3, applicationPackage, true, systemTest); // all applications: staging test jobs queued - assertEquals(3, buildSystem.jobs().size()); + assertEquals(3, deploymentQueue.jobs().size()); // app1: staging-test job fails with out of capacity and is added to the front of the queue tester.deploy(stagingTest, app1, applicationPackage); @@ -482,8 +483,8 @@ public class ControllerTest { .application(app1) .error(JobError.outOfCapacity) .submit(); - assertEquals(stagingTest.jobName(), buildSystem.jobs().get(0).jobName()); - assertEquals(project1, buildSystem.jobs().get(0).projectId()); + assertEquals(stagingTest.jobName(), deploymentQueue.jobs().get(0).jobName()); + assertEquals(project1, deploymentQueue.jobs().get(0).projectId()); // app2 and app3: Completes deployment tester.deployAndNotify(app2, applicationPackage, true, stagingTest); @@ -498,9 +499,9 @@ public class ControllerTest { tester.jobCompletion(component).application(app1).buildNumber(43).uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app1, applicationPackage, true, false, systemTest); tester.deploy(stagingTest, app1, applicationPackage); - assertEquals(1, buildSystem.takeJobsToRun().size()); + assertEquals(1, deploymentQueue.takeJobsToRun().size()); tester.jobCompletion(stagingTest).application(app1).error(JobError.outOfCapacity).submit(); - assertTrue("No jobs queued", buildSystem.jobs().isEmpty()); + assertTrue("No jobs queued", deploymentQueue.jobs().isEmpty()); // app2 and app3: New change triggers system-test jobs // Provide a changed application package, too, or the deployment is a no-op. @@ -510,26 +511,18 @@ public class ControllerTest { tester.jobCompletion(component).application(app3).buildNumber(43).uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app3, applicationPackage2, true, systemTest); - assertEquals(2, buildSystem.jobs().size()); + assertEquals(2, deploymentQueue.jobs().size()); - // app1: 4 hours pass in total, staging-test job is re-queued by periodic trigger mechanism and added at the + // app1: 4 hours pass in total, staging-test job for app1 is re-queued by periodic trigger mechanism and added at the // back of the queue tester.clock().advance(Duration.ofHours(3)); tester.clock().advance(Duration.ofMinutes(50)); tester.readyJobTrigger().maintain(); - List<com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob> nextJobs = buildSystem.takeJobsToRun(); - assertEquals(2, nextJobs.size()); - assertEquals(stagingTest.jobName(), nextJobs.get(0).jobName()); - assertEquals(project2, nextJobs.get(0).projectId()); - assertEquals(stagingTest.jobName(), nextJobs.get(1).jobName()); - assertEquals(project3, nextJobs.get(1).projectId()); - - // And finally the requeued job for app1 - nextJobs = buildSystem.takeJobsToRun(); - assertEquals(1, nextJobs.size()); - assertEquals(stagingTest.jobName(), nextJobs.get(0).jobName()); - assertEquals(project1, nextJobs.get(0).projectId()); + assertEquals(Collections.singletonList(new BuildService.BuildJob(project2, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); + assertEquals(Collections.singletonList(new BuildService.BuildJob(project3, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); + assertEquals(Collections.singletonList(new BuildService.BuildJob(project1, stagingTest.jobName())), deploymentQueue.takeJobsToRun()); + assertEquals(Collections.emptyList(), deploymentQueue.takeJobsToRun()); } private void assertStatus(JobStatus expectedStatus, ApplicationId id, Controller controller) { @@ -630,7 +623,7 @@ public class ControllerTest { // Test environments pass tester.deploy(DeploymentJobs.JobType.systemTest, application, applicationPackage); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(systemTest).application(application).submit(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutorTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutorTest.java new file mode 100644 index 00000000000..781725bc53c --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentJobExecutorTest.java @@ -0,0 +1,86 @@ +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; +import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; +import com.yahoo.vespa.hosted.controller.maintenance.JobControl; +import org.junit.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.Assert.assertEquals; + +/** + * @author jvenstad + */ +public class DeploymentJobExecutorTest { + + @Test + public void testMaintenance() { + DeploymentTester tester = new DeploymentTester(); + JobControl jobControl = new JobControl(tester.controller().curator()); + + int project1 = 1; + int project2 = 2; + int project3 = 3; + + ApplicationId app1 = tester.createApplication("app1", "tenant", project1, null).id(); + ApplicationId app2 = tester.createApplication("app2", "tenant", project2, null).id(); + ApplicationId app3 = tester.createApplication("app3", "tenant", project3, null).id(); + + // Create a BuildService which always rejects jobs from project2, but accepts and runs all others. + ArrayList<BuildJob> buildJobs = new ArrayList<>(); + BuildService buildService = buildJob -> buildJob.projectId() == project2 ? false : buildJobs.add(buildJob); + + DeploymentJobExecutor triggerer = new DeploymentJobExecutor(tester.controller(), + Duration.ofDays(1), + jobControl, + buildService, + Runnable::run); + + triggerer.maintain(); + assertEquals("No jobs are triggered initially.", + Collections.emptyList(), + buildJobs); + + // Trigger jobs in capacity constrained environment + tester.deploymentQueue().addJob(app1, DeploymentJobs.JobType.systemTest, false); + tester.deploymentQueue().addJob(app2, DeploymentJobs.JobType.systemTest, false); + tester.deploymentQueue().addJob(app3, DeploymentJobs.JobType.systemTest, false); + + // Trigger jobs in non-capacity constrained environment + tester.deploymentQueue().addJob(app1, DeploymentJobs.JobType.productionUsWest1, false); + tester.deploymentQueue().addJob(app2, DeploymentJobs.JobType.productionUsWest1, false); + tester.deploymentQueue().addJob(app3, DeploymentJobs.JobType.productionUsWest1, false); + + triggerer.maintain(); + assertEquals("One system test job and all production jobs not for app2 are triggered after one maintenance run.", + Arrays.asList(new BuildJob(project1, DeploymentJobs.JobType.systemTest.jobName()), + new BuildJob(project1, DeploymentJobs.JobType.productionUsWest1.jobName()), + new BuildJob(project3, DeploymentJobs.JobType.productionUsWest1.jobName())), + buildJobs); + + buildJobs.clear(); + triggerer.maintain(); + assertEquals("Next job in line fails to trigger in the build service.", + Collections.emptyList(), + buildJobs); + + buildJobs.clear(); + triggerer.maintain(); + assertEquals("Next job which was waiting for capacity is triggered on next run.", + Collections.singletonList(new BuildJob(project3, DeploymentJobs.JobType.systemTest.jobName())), + buildJobs); + + buildJobs.clear(); + triggerer.maintain(); + assertEquals("No jobs are left.", + Collections.emptyList(), + tester.deploymentQueue().takeJobsToRun()); + } + +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueueTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueueTest.java new file mode 100644 index 00000000000..bd2250b4402 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentQueueTest.java @@ -0,0 +1,65 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; +import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; +import com.yahoo.vespa.hosted.controller.persistence.MockCuratorDb; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author jvenstad + */ +public class DeploymentQueueTest { + + @Test + public void testJobOffering() { + DeploymentTester tester = new DeploymentTester(); + DeploymentQueue deploymentQueue = new DeploymentQueue(tester.controller(), tester.controller().curator()); + + int project1 = 1; + int project2 = 2; + int project3 = 3; + + ApplicationId app1 = tester.createApplication("app1", "tenant", project1, null).id(); + ApplicationId app2 = tester.createApplication("app2", "tenant", project2, null).id(); + ApplicationId app3 = tester.createApplication("app3", "tenant", project3, null).id(); + + // Trigger jobs in capacity constrained environment + deploymentQueue.addJob(app1, JobType.systemTest, false); + deploymentQueue.addJob(app2, JobType.systemTest, true); + deploymentQueue.addJob(app3, JobType.stagingTest, false); + + // Trigger jobs in non-capacity constrained environment + deploymentQueue.addJob(app1, JobType.productionUsWest1, false); + deploymentQueue.addJob(app2, JobType.productionUsWest1, false); + deploymentQueue.addJob(app3, JobType.productionUsWest1, false); + + assertEquals("Each offer contains a single job from each capacity constrained environment, and all other jobs.", + Arrays.asList(new BuildJob(project2, JobType.systemTest.jobName()), + new BuildJob(project3, JobType.stagingTest.jobName()), + new BuildJob(project1, JobType.productionUsWest1.jobName()), + new BuildJob(project2, JobType.productionUsWest1.jobName()), + new BuildJob(project3, JobType.productionUsWest1.jobName())), + deploymentQueue.takeJobsToRun()); + + assertEquals("The system test job for project 1 was pushed back in the queue by that for project 2.", + Collections.singletonList(new BuildJob(project1, JobType.systemTest.jobName())), + deploymentQueue.takeJobsToRun()); + + assertEquals("No jobs are left.", + Collections.emptyList(), + deploymentQueue.takeJobsToRun()); + } + +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 2d508d09b50..241443a1d32 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -68,7 +68,7 @@ public class DeploymentTester { public ApplicationController applications() { return tester.controller().applications(); } - public BuildSystem buildSystem() { return tester.controller().applications().deploymentTrigger().buildSystem(); } + public DeploymentQueue deploymentQueue() { return tester.controller().applications().deploymentTrigger().deploymentQueue(); } public DeploymentTrigger deploymentTrigger() { return tester.controller().applications().deploymentTrigger(); } @@ -267,24 +267,20 @@ public class DeploymentTester { } if (expectOnlyTheseJobs) assertEquals(jobs.length, countJobsOf(application)); - buildSystem().removeJobs(application.id()); + deploymentQueue().removeJobs(application.id()); } private BuildService.BuildJob findJob(Application application, JobType jobType) { - for (BuildService.BuildJob job : buildSystem().jobs()) { - if (job.projectId() == application.deploymentJobs().projectId().get() && - job.jobName().equals(jobType.jobName())) { + for (BuildService.BuildJob job : deploymentQueue().jobs()) + if (job.projectId() == application.deploymentJobs().projectId().get() && job.jobName().equals(jobType.jobName())) return job; - } - } throw new IllegalArgumentException(jobType + " is not scheduled for " + application); } private int countJobsOf(Application application) { - return (int) buildSystem().jobs().stream() - .filter(job -> job.projectId() == application.deploymentJobs() - .projectId().get()) - .count(); + return (int) deploymentQueue().jobs().stream() + .filter(job -> job.projectId() == application.deploymentJobs().projectId().get()) + .count(); } private void notifyJobCompletion(DeploymentJobs.JobReport report) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 4d7d373a157..c4b3bd82bfe 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -61,28 +61,28 @@ public class DeploymentTriggerTest { // system-test fails and is retried tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest); - assertEquals("Retried immediately", 1, tester.buildSystem().jobs().size()); + assertEquals("Retried immediately", 1, tester.deploymentQueue().jobs().size()); tester.clock().advance(Duration.ofHours(1)); tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest); tester.clock().advance(Duration.ofHours(1)); - assertEquals("Nothing scheduled", 0, tester.buildSystem().jobs().size()); + assertEquals("Nothing scheduled", 0, tester.deploymentQueue().jobs().size()); tester.readyJobTrigger().maintain(); // Causes retry of systemTests - assertEquals("Scheduled retry", 1, tester.buildSystem().jobs().size()); + assertEquals("Scheduled retry", 1, tester.deploymentQueue().jobs().size()); tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest); // staging-test times out and is retried - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); tester.readyJobTrigger().maintain(); - assertEquals("Retried dead job", 1, tester.buildSystem().jobs().size()); - assertEquals(JobType.stagingTest.jobName(), tester.buildSystem().jobs().get(0).jobName()); + assertEquals("Retried dead job", 1, tester.deploymentQueue().jobs().size()); + assertEquals(JobType.stagingTest.jobName(), tester.deploymentQueue().jobs().get(0).jobName()); } @Test public void deploymentSpecDecidesTriggerOrder() { DeploymentTester tester = new DeploymentTester(); - BuildSystem buildSystem = tester.buildSystem(); + DeploymentQueue deploymentQueue = tester.deploymentQueue(); TenantId tenant = tester.controllerTester().createTenant("tenant1", "domain1", 1L); Application application = tester.controllerTester().createApplication(tenant, "app1", "default", 1L); ApplicationPackage applicationPackage = new ApplicationPackageBuilder() @@ -101,13 +101,13 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, applicationPackage, true, JobType.productionCorpUsEast1); tester.deployAndNotify(application, applicationPackage, true, JobType.productionUsCentral1); tester.deployAndNotify(application, applicationPackage, true, JobType.productionUsWest1); - assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); + assertTrue("All jobs consumed", deploymentQueue.jobs().isEmpty()); } @Test public void deploymentsSpecWithDelays() { DeploymentTester tester = new DeploymentTester(); - BuildSystem buildSystem = tester.buildSystem(); + DeploymentQueue deploymentQueue = tester.deploymentQueue(); Application application = tester.createApplication("app1", "tenant1", 1, 1L); ApplicationPackage applicationPackage = new ApplicationPackageBuilder() @@ -127,21 +127,21 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, applicationPackage, true, JobType.systemTest); tester.clock().advance(Duration.ofSeconds(1)); // Make staging test sort as the last successful job tester.deployAndNotify(application, applicationPackage, true, JobType.stagingTest); - assertTrue("No more jobs triggered at this time", buildSystem.jobs().isEmpty()); + assertTrue("No more jobs triggered at this time", deploymentQueue.jobs().isEmpty()); // 30 seconds pass, us-west-1 is triggered tester.clock().advance(Duration.ofSeconds(30)); tester.deploymentTrigger().triggerReadyJobs(); // Consume us-west-1 job without reporting completion - assertEquals(1, buildSystem.jobs().size()); - assertEquals(JobType.productionUsWest1.jobName(), buildSystem.jobs().get(0).jobName()); - buildSystem.takeJobsToRun(); + assertEquals(1, deploymentQueue.jobs().size()); + assertEquals(JobType.productionUsWest1.jobName(), deploymentQueue.jobs().get(0).jobName()); + deploymentQueue.takeJobsToRun(); // 3 minutes pass, delayed trigger does nothing as us-west-1 is still in progress tester.clock().advance(Duration.ofMinutes(3)); tester.deploymentTrigger().triggerReadyJobs(); - assertTrue("No more jobs triggered at this time", buildSystem.jobs().isEmpty()); + assertTrue("No more jobs triggered at this time", deploymentQueue.jobs().isEmpty()); // us-west-1 completes tester.deploy(JobType.productionUsWest1, application, applicationPackage); @@ -149,18 +149,18 @@ public class DeploymentTriggerTest { // Delayed trigger does nothing as not enough time has passed after us-west-1 completion tester.deploymentTrigger().triggerReadyJobs(); - assertTrue("No more jobs triggered at this time", buildSystem.jobs().isEmpty()); + assertTrue("No more jobs triggered at this time", deploymentQueue.jobs().isEmpty()); // 3 minutes pass, us-central-1 is triggered tester.clock().advance(Duration.ofMinutes(3)); tester.deploymentTrigger().triggerReadyJobs(); tester.deployAndNotify(application, applicationPackage, true, JobType.productionUsCentral1); - assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); + assertTrue("All jobs consumed", deploymentQueue.jobs().isEmpty()); // Delayed trigger job runs again, with nothing to trigger tester.clock().advance(Duration.ofMinutes(10)); tester.deploymentTrigger().triggerReadyJobs(); - assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); + assertTrue("All jobs consumed", deploymentQueue.jobs().isEmpty()); } @Test @@ -183,26 +183,26 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, applicationPackage, true, JobType.stagingTest); // Deploys in first region - assertEquals(1, tester.buildSystem().jobs().size()); + assertEquals(1, tester.deploymentQueue().jobs().size()); tester.deployAndNotify(application, applicationPackage, true, JobType.productionUsCentral1); // Deploys in two regions in parallel - assertEquals(2, tester.buildSystem().jobs().size()); - assertEquals(JobType.productionUsEast3.jobName(), tester.buildSystem().jobs().get(0).jobName()); - assertEquals(JobType.productionUsWest1.jobName(), tester.buildSystem().jobs().get(1).jobName()); - tester.buildSystem().takeJobsToRun(); + assertEquals(2, tester.deploymentQueue().jobs().size()); + assertEquals(JobType.productionUsEast3.jobName(), tester.deploymentQueue().jobs().get(0).jobName()); + assertEquals(JobType.productionUsWest1.jobName(), tester.deploymentQueue().jobs().get(1).jobName()); + tester.deploymentQueue().takeJobsToRun(); tester.deploy(JobType.productionUsWest1, application, applicationPackage, false); tester.jobCompletion(JobType.productionUsWest1).application(application).submit(); - assertTrue("No more jobs triggered at this time", tester.buildSystem().jobs().isEmpty()); + assertTrue("No more jobs triggered at this time", tester.deploymentQueue().jobs().isEmpty()); tester.deploy(JobType.productionUsEast3, application, applicationPackage, false); tester.jobCompletion(JobType.productionUsEast3).application(application).submit(); // Last region completes - assertEquals(1, tester.buildSystem().jobs().size()); + assertEquals(1, tester.deploymentQueue().jobs().size()); tester.deployAndNotify(application, applicationPackage, true, JobType.productionEuWest1); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -236,7 +236,7 @@ public class DeploymentTriggerTest { @Test public void testSuccessfulDeploymentApplicationPackageChanged() { DeploymentTester tester = new DeploymentTester(); - BuildSystem buildSystem = tester.buildSystem(); + DeploymentQueue deploymentQueue = tester.deploymentQueue(); TenantId tenant = tester.controllerTester().createTenant("tenant1", "domain1", 1L); Application application = tester.controllerTester().createApplication(tenant, "app1", "default", 1L); ApplicationPackage previousApplicationPackage = new ApplicationPackageBuilder() @@ -264,7 +264,7 @@ public class DeploymentTriggerTest { tester.deployAndNotify(application, newApplicationPackage, true, JobType.productionUsCentral1); tester.deployAndNotify(application, newApplicationPackage, true, JobType.productionUsWest1); tester.deployAndNotify(application, newApplicationPackage, true, JobType.productionEuWest1); - assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); + assertTrue("All jobs consumed", deploymentQueue.jobs().isEmpty()); } @Test @@ -293,8 +293,8 @@ public class DeploymentTriggerTest { tester.clock().advance(Duration.ofHours(1)); // --------------- Enter block window: 18:30 readyJobsTrigger.run(); - assertEquals(0, tester.buildSystem().jobs().size()); - + assertEquals(0, tester.deploymentQueue().jobs().size()); + String searchDefinition = "search test {\n" + " document test {\n" + @@ -314,12 +314,12 @@ public class DeploymentTriggerTest { tester.deployAndNotify(app, changedApplication, true, stagingTest); readyJobsTrigger.run(); - assertEquals(0, tester.buildSystem().jobs().size()); + assertEquals(0, tester.deploymentQueue().jobs().size()); tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30 tester.deploymentTrigger().triggerReadyJobs(); // Schedules the blocked production job(s) - assertEquals(1, tester.buildSystem().jobs().size()); - BuildService.BuildJob productionJob = tester.buildSystem().takeJobsToRun().get(0); + assertEquals(1, tester.deploymentQueue().jobs().size()); + BuildService.BuildJob productionJob = tester.deploymentQueue().takeJobsToRun().get(0); assertEquals("production-us-west-1", productionJob.jobName()); } @@ -332,16 +332,16 @@ public class DeploymentTriggerTest { LockedApplication app = (LockedApplication)tester.createAndDeploy("default0", 3, "default"); // Store that we are upgrading but don't start the system-tests job tester.controller().applications().store(app.withChange(Change.of(Version.fromString("6.2")))); - assertEquals(0, tester.buildSystem().jobs().size()); + assertEquals(0, tester.deploymentQueue().jobs().size()); readyJobsTrigger.run(); - assertEquals(1, tester.buildSystem().jobs().size()); - assertEquals("system-test", tester.buildSystem().jobs().get(0).jobName()); + assertEquals(1, tester.deploymentQueue().jobs().size()); + assertEquals("system-test", tester.deploymentQueue().jobs().get(0).jobName()); } @Test public void testHandleMultipleNotificationsFromLastJob() { DeploymentTester tester = new DeploymentTester(); - BuildSystem buildSystem = tester.buildSystem(); + DeploymentQueue deploymentQueue = tester.deploymentQueue(); TenantId tenant = tester.controllerTester().createTenant("tenant1", "domain1", 1L); Application application = tester.controllerTester().createApplication(tenant, "app1", "default", 1L); ApplicationPackage applicationPackage = new ApplicationPackageBuilder() @@ -361,7 +361,7 @@ public class DeploymentTriggerTest { tester.jobCompletion(JobType.productionCorpUsEast1).application(application).submit(); assertFalse("Change has been deployed", tester.applications().require(application.id()).change().isPresent()); - assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); + assertTrue("All jobs consumed", deploymentQueue.jobs().isEmpty()); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java deleted file mode 100644 index b7da2af263c..00000000000 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.component.Version; -import com.yahoo.vespa.hosted.controller.ControllerTester; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; - -import java.time.Duration; -import java.util.EnumMap; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.function.Supplier; - -import static com.yahoo.vespa.hosted.controller.deployment.MockBuildService.JobStatus.QUEUED; -import static com.yahoo.vespa.hosted.controller.deployment.MockBuildService.JobStatus.RUNNING; - -/** - * Simulates polling of build jobs from the controller and triggering and execution of - * these in Screwdriver. - * - * @author jvenstad - */ -public class MockBuildService implements BuildService { - - private final ControllerTester tester; - private final MockTimeline timeline; - private final Map<String, Job> jobs; - private final Map<String, JobStatus> jobStatuses; - private Version version; - - public MockBuildService(ControllerTester tester, MockTimeline timeline) { - this.tester = tester; - this.timeline = timeline; - jobs = new HashMap<>(); - jobStatuses = new HashMap<>(); - version = new Version(6, 86); - } - - /** Simulates the triggering of a Screwdriver job, where jobs are queued if already running. */ - @Override - public boolean trigger(BuildJob buildJob) { - String key = buildJob.toString(); - System.err.println(timeline.now() + ": Asked to trigger " + key); - - if ( ! jobStatuses.containsKey(key)) - startJob(key); - else - jobStatuses.put(key, QUEUED); - - return true; - } - - /** Simulates the internal triggering of Screwdriver, where only one instance is run at a time. */ - private void startJob(String key) { - jobStatuses.put(key, RUNNING); - Job job = jobs.get(key); - if (job == null) - return; - - timeline.in(job.duration, () -> { - job.outcome(); - if (jobStatuses.get(key) == QUEUED) - startJob(key); - else - jobStatuses.remove(key); - }); - System.err.println(timeline.now() + ": Triggered " + key + "; it will finish at " + timeline.now().plus(job.duration)); - } - - public void incrementVersion() { - version = new Version(version.getMajor(), version.getMinor() + 1); - } - - public Version version() { return version; } - - /** Add @job to the set of @Job objects we have information about. */ - private void add(Job job) { - jobs.put(job.buildJob().toString(), job); - } - - /** Add @project to the set of @Project objects we have information about. */ - private void add(Project project) { - project.jobs.values().forEach(this::add); - } - - /** Make a @Project with the given settings, modify it if desired, and @add() it its jobs to the pool of known ones. */ - public Project project(ApplicationId applicationId, Long projectId, Duration duration, Supplier<Boolean> success) { - return new Project(applicationId, projectId, duration, success); - } - - - /** Convenience creator for many jobs, belonging to the same project. Jobs can be modified independently after creation. */ - class Project { - - private final ApplicationId applicationId; - private final Long projectId; - private final Duration duration; - private final Supplier<Boolean> success; - private final Map<JobType, Job> jobs; - - private Project(ApplicationId applicationId, Long projectId, Duration duration, Supplier<Boolean> success) { - this.applicationId = applicationId; - this.projectId = projectId; - this.duration = duration; - this.success = success; - - jobs = new EnumMap<>(JobType.class); - - for (JobType jobType : JobType.values()) - jobs.put(jobType, new Job(applicationId, projectId, jobType, duration, success)); - } - - /** Set @duration for @jobType of this @Project. */ - public Project set(Duration duration, JobType jobType) { - jobs.compute(jobType, (type, job) -> new Job(applicationId, projectId, jobType, duration, job.success)); - return this; - } - - /** Set @success for @jobType of this @Project. */ - public Project set(Supplier<Boolean> success, JobType jobType) { - jobs.compute(jobType, (type, job) -> new Job(applicationId, projectId, jobType, job.duration, success)); - return this; - } - - /** Add the @Job objects of this @Project to the pool of known jobs for this @MockBuildService. */ - public void add() { - MockBuildService.this.add(this); - } - - } - - - /** Representation of a simulated job -- most noteworthy is the @outcome(), which is used to simulate a job completing. */ - private class Job { - - private final ApplicationId applicationId; - private final Long projectId; - private final JobType jobType; - private final Duration duration; - private final Supplier<Boolean> success; - - private Job(ApplicationId applicationId, Long projectId, JobType jobType, Duration duration, Supplier<Boolean> success) { - this.applicationId = applicationId; - this.projectId = projectId; - this.jobType = jobType; - this.duration = duration; - this.success = success; - } - - private void outcome() { - Boolean success = this.success.get(); - System.err.println(timeline.now() + ": Job " + projectId + ":" + jobType + " reports " + success); - if (success != null) - tester.controller().applications().notifyJobCompletion( - new DeploymentJobs.JobReport( - applicationId, - jobType, - projectId, - 42, - Optional.empty(), - Optional.ofNullable(success ? null : JobError.unknown) - )); - } - - private BuildJob buildJob() { return new BuildJob(projectId, jobType.jobName()); } - - } - - enum JobStatus { - QUEUED, - RUNNING - } - -} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockTimeline.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockTimeline.java deleted file mode 100644 index 878c25bf6bd..00000000000 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockTimeline.java +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.test.ManualClock; - -import java.time.Duration; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.PriorityQueue; - -/** - * @author jvenstad - */ -public class MockTimeline { - - private final ManualClock clock; - private final PriorityQueue<Event> events; - - public MockTimeline(ManualClock clock) { - this.events = new PriorityQueue<>(); - this.clock = clock; - } - - /** Make @event happen at time @at, as measured by the internal clock. */ - public void at(Instant at, Runnable event) { - if (at.isBefore(now())) - throw new IllegalArgumentException("The flow of time runs only one way, my friend."); - events.add(new Event(at, event)); - } - - /** Make @event happen in @in time, as measured by the internal clock. */ - public void in(Duration in, Runnable event) { - at(now().plus(in), event); - } - - /** Make @event happen every @period time, starting @offset time from @now(), as measured by the internal clock. */ - public void every(Duration period, Duration offset, Runnable event) { - in(offset, () -> { - every(period, event); - event.run(); - }); - } - - /** Make @event happen every @period time, starting @period time from @now(), as measured by the internal clock. */ - public void every(Duration period, Runnable event) { - every(period, period, event); - } - - /** Returns the current time, as measured by the internal clock. */ - public Instant now() { - return clock.instant(); - } - - /** Returns whether there are more events in the timeline, or not. */ - public boolean hasNext() { - return ! events.isEmpty(); - } - - /** Advance time to the next event, let it happen, and return the time of this event. */ - public Instant next() { - Event event = events.poll(); - clock.advance(Duration.ofMillis(now().until(event.at(), ChronoUnit.MILLIS))); - event.happen(); - return event.at(); - } - - /** Advance the time until @until, letting all events from now to then happen. */ - public void advance(Instant until) { - at(until, () -> {}); - while (next() != until); - } - - /** Advance the time by @duration, letting all events from now to then happen. */ - public void advance(Duration duration) { - advance(now().plus(duration)); - } - - /** Let the timeline unfold! Careful about those @every-s, though... */ - public void unfold() { - while (hasNext()) - next(); - } - - - private static class Event implements Comparable<Event> { - - private final Instant at; - private final Runnable event; - - private Event(Instant at, Runnable event) { - this.at = at; - this.event = event; - } - - public Instant at() { return at; } - public void happen() { event.run(); } - - - @Override - public int compareTo(Event other) { - return at().compareTo(other.at()); - } - - } - -} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java deleted file mode 100644 index e66d7e9168d..00000000000 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; -import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.persistence.MockCuratorDb; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -import java.util.Arrays; -import java.util.List; - -import static org.junit.Assert.assertEquals; - -/** - * @author mpolden - */ -@RunWith(Parameterized.class) -public class PolledBuildSystemTest { - - @Parameterized.Parameters(name = "jobType={0}") - public static Iterable<?> capacityConstrainedJobs() { - return Arrays.asList(JobType.systemTest, JobType.stagingTest); - } - - private final JobType jobType; - - public PolledBuildSystemTest(JobType jobType) { - this.jobType = jobType; - } - - @Test - public void throttle_capacity_constrained_jobs() { - DeploymentTester tester = new DeploymentTester(); - BuildSystem buildSystem = new PolledBuildSystem(tester.controller(), new MockCuratorDb()); - - int project1 = 1; - int project2 = 2; - int project3 = 3; - ApplicationPackage applicationPackage = new ApplicationPackageBuilder() - .region("us-west-1") - .build(); - ApplicationId app1 = tester.createAndDeploy("app1", project1, applicationPackage).id(); - ApplicationId app2 = tester.createAndDeploy("app2", project2, applicationPackage).id(); - ApplicationId app3 = tester.createAndDeploy("app3", project3, applicationPackage).id(); - - // Trigger jobs in capacity constrained environment - buildSystem.addJob(app1, jobType, false); - buildSystem.addJob(app2, jobType, false); - buildSystem.addJob(app3, jobType, false); - - // A limited number of jobs are offered at a time: - // First offer - List<BuildJob> nextJobs = buildSystem.takeJobsToRun(); - assertEquals(2, nextJobs.size()); - assertEquals(project1, nextJobs.get(0).projectId()); - assertEquals(project2, nextJobs.get(1).projectId()); - - // Second offer - nextJobs = buildSystem.takeJobsToRun(); - assertEquals(1, nextJobs.size()); - assertEquals(project3, nextJobs.get(0).projectId()); - } - -} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java index 908d8f3a484..092cdcd6984 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java @@ -4,9 +4,11 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.SystemName; +import com.yahoo.log.event.Collection; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; @@ -17,8 +19,10 @@ import org.junit.Test; import java.nio.file.Files; import java.nio.file.Paths; import java.time.Duration; +import java.util.Collections; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component; +import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -58,7 +62,7 @@ public class FailureRedeployerTest { // Production job fails and is retried tester.clock().advance(Duration.ofSeconds(1)); // Advance time so that we can detect jobs in progress tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.productionUsEast3); - assertEquals("Production job is retried", 1, tester.buildSystem().jobs().size()); + assertEquals("Production job is retried", 1, tester.deploymentQueue().jobs().size()); assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get()); // Another version is released, which cancels any pending upgrades to lower versions @@ -66,13 +70,13 @@ public class FailureRedeployerTest { tester.updateVersionStatus(version); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); // Finish previous production job. tester.upgrader().maintain(); - assertEquals("Application starts upgrading to new version", 1, tester.buildSystem().jobs().size()); + assertEquals("Application starts upgrading to new version", 1, tester.deploymentQueue().jobs().size()); assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get()); // Failure redeployer does not retry failing job for prod.us-east-3 as there's an ongoing deployment tester.clock().advance(Duration.ofMinutes(1)); tester.readyJobTrigger().maintain(); - assertFalse("Job is not retried", tester.buildSystem().jobs().stream() + assertFalse("Job is not retried", tester.deploymentQueue().jobs().stream() .anyMatch(j -> j.jobName().equals(DeploymentJobs.JobType.productionUsEast3.jobName()))); // Test environments pass @@ -81,21 +85,20 @@ public class FailureRedeployerTest { // Production job fails again and exhausts all immediate retries tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.productionUsEast3); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(DeploymentJobs.JobType.productionUsEast3).application(app).unsuccessful().submit(); - assertTrue("Retries exhausted", tester.buildSystem().jobs().isEmpty()); + assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty()); assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures()); // Failure redeployer retries job tester.clock().advance(Duration.ofMinutes(5)); tester.readyJobTrigger().maintain(); - assertEquals("Job is retried", 1, tester.buildSystem().jobs().size()); - assertEquals(DeploymentJobs.JobType.productionUsEast3.jobName(), tester.buildSystem().jobs().get(0).jobName()); + assertEquals("Job is retried", Collections.singletonList(new BuildService.BuildJob(app.deploymentJobs().projectId().get(), productionUsEast3.jobName())), tester.deploymentQueue().jobs()); // Production job finally succeeds tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); assertFalse("No failures", tester.application(app.id()).deploymentJobs().hasFailures()); } @@ -113,20 +116,20 @@ public class FailureRedeployerTest { tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); // staging-test starts, but does not complete - assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.buildSystem().takeJobsToRun().get(0).jobName()); + assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.deploymentQueue().takeJobsToRun().get(0).jobName()); tester.readyJobTrigger().maintain(); - assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs retried", tester.deploymentQueue().jobs().isEmpty()); // Just over 12 hours pass, job is retried tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); tester.readyJobTrigger().maintain(); - assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.buildSystem().takeJobsToRun().get(0).jobName()); + assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.deploymentQueue().takeJobsToRun().get(0).jobName()); // Deployment completes tester.deploy(DeploymentJobs.JobType.stagingTest, app, applicationPackage, true); tester.jobCompletion(DeploymentJobs.JobType.stagingTest).application(app).submit(); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -155,10 +158,10 @@ public class FailureRedeployerTest { // system-test fails and exhausts all immediate retries tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.systemTest); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(DeploymentJobs.JobType.systemTest).application(app).unsuccessful().submit(); - assertTrue("Retries exhausted", tester.buildSystem().jobs().isEmpty()); + assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty()); // Another version is released version = Version.fromString("5.2"); @@ -168,12 +171,12 @@ public class FailureRedeployerTest { assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get()); // Consume system-test job for 5.2 - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); // Failure re-deployer does not retry failing system-test job as it failed for an older change tester.clock().advance(Duration.ofMinutes(5)); tester.readyJobTrigger().maintain(); - assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs retried", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -207,23 +210,23 @@ public class FailureRedeployerTest { // Test environments pass tester.deploy(DeploymentJobs.JobType.systemTest, application, applicationPackage); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(DeploymentJobs.JobType.systemTest).application(application).submit(); tester.deploy(DeploymentJobs.JobType.stagingTest, application, applicationPackage); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(DeploymentJobs.JobType.stagingTest).application(application).submit(); // Production job starts, but does not complete - assertEquals(1, tester.buildSystem().jobs().size()); - assertEquals("Production job triggered", DeploymentJobs.JobType.productionCdUsCentral1.jobName(), tester.buildSystem().jobs().get(0).jobName()); - tester.buildSystem().takeJobsToRun(); + assertEquals(1, tester.deploymentQueue().jobs().size()); + assertEquals("Production job triggered", DeploymentJobs.JobType.productionCdUsCentral1.jobName(), tester.deploymentQueue().jobs().get(0).jobName()); + tester.deploymentQueue().takeJobsToRun(); // Failure re-deployer runs tester.readyJobTrigger().maintain(); - assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs retried", tester.deploymentQueue().jobs().isEmpty()); // Deployment notifies completeness but has not actually made a deployment tester.jobCompletion(DeploymentJobs.JobType.productionCdUsCentral1).application(application).submit(); @@ -253,7 +256,7 @@ public class FailureRedeployerTest { // Failure redeployer does not restart deployment tester.readyJobTrigger().maintain(); - assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -273,7 +276,7 @@ public class FailureRedeployerTest { // Failure redeployer does not restart deployment tester.readyJobTrigger().maintain(); - assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty()); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java index 6284c3e66ef..4ee9d50a3f7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java @@ -56,21 +56,21 @@ public class OutstandingChangeDeployerTest { Application app = tester.application("app1"); assertTrue(app.outstandingChange().isPresent()); assertEquals("1.0.43-cafed00d", app.outstandingChange().application().get().id()); - assertEquals(1, tester.buildSystem().jobs().size()); + assertEquals(1, tester.deploymentQueue().jobs().size()); deployer.maintain(); - assertEquals("No effect as job is in progress", 1, tester.buildSystem().jobs().size()); + assertEquals("No effect as job is in progress", 1, tester.deploymentQueue().jobs().size()); assertEquals("1.0.43-cafed00d", app.outstandingChange().application().get().id()); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsWest1); - assertEquals("Upgrade done", 0, tester.buildSystem().jobs().size()); + assertEquals("Upgrade done", 0, tester.deploymentQueue().jobs().size()); deployer.maintain(); app = tester.application("app1"); assertEquals("1.0.43-cafed00d", app.change().application().get().id()); - List<BuildService.BuildJob> jobs = tester.buildSystem().jobs(); + List<BuildService.BuildJob> jobs = tester.deploymentQueue().jobs(); assertEquals(1, jobs.size()); assertEquals(11, jobs.get(0).projectId()); assertEquals(DeploymentJobs.JobType.systemTest.jobName(), jobs.get(0).jobName()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index e13f4fca97c..09836738af0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -41,7 +41,7 @@ public class UpgraderTest { tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("No applications: Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("No applications: Nothing to do", 0, tester.deploymentQueue().jobs().size()); // Setup applications Application canary0 = tester.createAndDeploy("canary0", 1, "canary"); @@ -52,7 +52,7 @@ public class UpgraderTest { Application conservative0 = tester.createAndDeploy("conservative0", 6, "conservative"); tester.upgrader().maintain(); - assertEquals("All already on the right version: Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("All already on the right version: Nothing to do", 0, tester.deploymentQueue().jobs().size()); // --- 5.1 is released - everything goes smoothly version = Version.fromString("5.1"); @@ -60,20 +60,20 @@ public class UpgraderTest { assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); tester.upgrader().maintain(); - assertEquals("New system version: Should upgrade Canaries", 2, tester.buildSystem().jobs().size()); + assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary0, version, "canary"); assertEquals(version, tester.configServer().lastPrepareVersion().get()); tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("One canary pending; nothing else", 1, tester.buildSystem().jobs().size()); + assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary1, version, "canary"); tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Canaries done: Should upgrade defaults", 3, tester.buildSystem().jobs().size()); + assertEquals("Canaries done: Should upgrade defaults", 3, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(default0, version, "default"); tester.completeUpgrade(default1, version, "default"); @@ -82,12 +82,12 @@ public class UpgraderTest { tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Normals done: Should upgrade conservatives", 1, tester.buildSystem().jobs().size()); + assertEquals("Normals done: Should upgrade conservatives", 1, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(conservative0, version, "conservative"); tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("Nothing to do", 0, tester.deploymentQueue().jobs().size()); // --- 5.2 is released - which fails a Canary version = Version.fromString("5.2"); @@ -95,10 +95,10 @@ public class UpgraderTest { assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); tester.upgrader().maintain(); - assertEquals("New system version: Should upgrade Canaries", 2, tester.buildSystem().jobs().size()); + assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size()); tester.completeUpgradeWithError(canary0, version, "canary", DeploymentJobs.JobType.stagingTest); - assertEquals("Other Canary was cancelled", 2, tester.buildSystem().jobs().size()); - // TODO: Cancelled would mean it was triggered, removed from the build system, but never reported in. + assertEquals("Other Canary was cancelled", 2, tester.deploymentQueue().jobs().size()); + // TODO: Cancelled would mean it was triggerd, removed from the build system, but never reported in. // Thus, the expected number of jobs should be 1, above: the retrying canary0. // Further, canary1 should be retried after the timeout period of 12 hours, but verifying this is // not possible when jobs are consumed form the build system on notification, rather than on deploy. @@ -106,7 +106,7 @@ public class UpgraderTest { tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Version broken, but Canaries should keep trying", 2, tester.buildSystem().jobs().size()); + assertEquals("Version broken, but Canaries should keep trying", 2, tester.deploymentQueue().jobs().size()); // Exhaust canary retries. tester.jobCompletion(systemTest).application(canary1).unsuccessful().submit(); @@ -120,13 +120,13 @@ public class UpgraderTest { assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); tester.upgrader().maintain(); - assertEquals("New system version: Should upgrade Canaries", 2, tester.buildSystem().jobs().size()); + assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary0, version, "canary"); assertEquals(version, tester.configServer().lastPrepareVersion().get()); tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("One canary pending; nothing else", 1, tester.buildSystem().jobs().size()); + assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary1, version, "canary"); @@ -134,7 +134,7 @@ public class UpgraderTest { assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Canaries done: Should upgrade defaults", 3, tester.buildSystem().jobs().size()); + assertEquals("Canaries done: Should upgrade defaults", 3, tester.deploymentQueue().jobs().size()); tester.completeUpgradeWithError(default0, version, "default", DeploymentJobs.JobType.stagingTest); tester.completeUpgrade(default1, version, "default"); @@ -144,7 +144,7 @@ public class UpgraderTest { assertEquals("Not enough evidence to mark this as neither broken nor high", VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); - assertEquals("Upgrade with error should retry", 1, tester.buildSystem().jobs().size()); + assertEquals("Upgrade with error should retry", 1, tester.deploymentQueue().jobs().size()); // Finish previous run, with exhausted retry. tester.clock().advance(Duration.ofHours(1)); @@ -157,13 +157,13 @@ public class UpgraderTest { tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Normals done: Should upgrade conservatives", 1, tester.buildSystem().jobs().size()); + assertEquals("Normals done: Should upgrade conservatives", 1, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(conservative0, version, "conservative"); tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("Applications are on 5.3 - nothing to do", 0, tester.buildSystem().jobs().size()); - + assertEquals("Applications are on 5.3 - nothing to do", 0, tester.deploymentQueue().jobs().size()); + // --- Starting upgrading to a new version which breaks, causing upgrades to commence on the previous version Version version54 = Version.fromString("5.4"); Application default3 = tester.createAndDeploy("default3", 5, "default"); // need 4 to break a version @@ -175,12 +175,14 @@ public class UpgraderTest { tester.updateVersionStatus(version54); assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Upgrade of defaults are scheduled", 5, tester.buildSystem().jobs().size()); + + assertEquals("Upgrade of defaults are scheduled", 5, tester.deploymentQueue().jobs().size()); assertEquals(version54, tester.application(default0.id()).change().platform().get()); assertEquals(version54, tester.application(default1.id()).change().platform().get()); assertEquals(version54, tester.application(default2.id()).change().platform().get()); assertEquals(version54, tester.application(default3.id()).change().platform().get()); assertEquals(version54, tester.application(default4.id()).change().platform().get()); + tester.completeUpgrade(default0, version54, "default"); // State: Default applications started upgrading to 5.4 (and one completed) Version version55 = Version.fromString("5.5"); @@ -191,12 +193,14 @@ public class UpgraderTest { tester.updateVersionStatus(version55); assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Upgrade of defaults are scheduled", 5, tester.buildSystem().jobs().size()); + + assertEquals("Upgrade of defaults are scheduled", 5, tester.deploymentQueue().jobs().size()); assertEquals(version55, tester.application(default0.id()).change().platform().get()); assertEquals(version54, tester.application(default1.id()).change().platform().get()); assertEquals(version54, tester.application(default2.id()).change().platform().get()); assertEquals(version54, tester.application(default3.id()).change().platform().get()); assertEquals(version54, tester.application(default4.id()).change().platform().get()); + tester.completeUpgrade(default1, version54, "default"); tester.completeUpgrade(default2, version54, "default"); @@ -222,7 +226,7 @@ public class UpgraderTest { tester.upgrader().maintain(); assertEquals("Upgrade of defaults are scheduled on 5.4 instead, since 5.5 broken: " + "This is default3 since it failed upgrade on both 5.4 and 5.5", - 1, tester.buildSystem().jobs().size()); + 1, tester.deploymentQueue().jobs().size()); assertEquals("5.4", tester.application(default3.id()).change().platform().get().toString()); } @@ -231,13 +235,13 @@ public class UpgraderTest { // --- Setup DeploymentTester tester = new DeploymentTester(); tester.upgrader().maintain(); - assertEquals("No system version: Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("No system version: Nothing to do", 0, tester.deploymentQueue().jobs().size()); Version version = Version.fromString("5.0"); // (lower than the hardcoded version in the config server client) tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("No applications: Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("No applications: Nothing to do", 0, tester.deploymentQueue().jobs().size()); // Setup applications Application canary0 = tester.createAndDeploy("canary0", 1, "canary"); @@ -254,7 +258,7 @@ public class UpgraderTest { Application default9 = tester.createAndDeploy("default9", 12, "default"); tester.upgrader().maintain(); - assertEquals("All already on the right version: Nothing to do", 0, tester.buildSystem().jobs().size()); + assertEquals("All already on the right version: Nothing to do", 0, tester.deploymentQueue().jobs().size()); // --- A new version is released version = Version.fromString("5.1"); @@ -262,20 +266,20 @@ public class UpgraderTest { assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); tester.upgrader().maintain(); - assertEquals("New system version: Should upgrade Canaries", 2, tester.buildSystem().jobs().size()); + assertEquals("New system version: Should upgrade Canaries", 2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary0, version, "canary"); assertEquals(version, tester.configServer().lastPrepareVersion().get()); tester.updateVersionStatus(version); tester.upgrader().maintain(); - assertEquals("One canary pending; nothing else", 1, tester.buildSystem().jobs().size()); + assertEquals("One canary pending; nothing else", 1, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary1, version, "canary"); tester.updateVersionStatus(version); assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Canaries done: Should upgrade defaults", 10, tester.buildSystem().jobs().size()); + assertEquals("Canaries done: Should upgrade defaults", 10, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(default0, version, "default"); tester.completeUpgradeWithError(default1, version, "default", systemTest); @@ -287,7 +291,7 @@ public class UpgraderTest { tester.updateVersionStatus(version); tester.upgrader().maintain(); assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence()); - assertEquals("Upgrades are cancelled", 0, tester.buildSystem().jobs().size()); + assertEquals("Upgrades are cancelled", 0, tester.deploymentQueue().jobs().size()); } @Test @@ -309,7 +313,7 @@ public class UpgraderTest { tester.upgrader().maintain(); assertEquals("Application is on expected version: Nothing to do", 0, - tester.buildSystem().jobs().size()); + tester.deploymentQueue().jobs().size()); // New version is released version = Version.fromString("5.1"); @@ -322,10 +326,10 @@ public class UpgraderTest { // staging-test fails multiple times, exhausts retries and failure is recorded tester.deployAndNotify(app, applicationPackage, false, DeploymentJobs.JobType.stagingTest); - tester.buildSystem().takeJobsToRun(); + tester.deploymentQueue().takeJobsToRun(); tester.clock().advance(Duration.ofMinutes(10)); tester.jobCompletion(stagingTest).application(app).unsuccessful().submit(); - assertTrue("Retries exhausted", tester.buildSystem().jobs().isEmpty()); + assertTrue("Retries exhausted", tester.deploymentQueue().jobs().isEmpty()); assertTrue("Failure is recorded", tester.application(app.id()).deploymentJobs().hasFailures()); assertTrue("Application has pending change", tester.application(app.id()).change().isPresent()); @@ -337,12 +341,12 @@ public class UpgraderTest { // Upgrade is scheduled. system-tests starts, but does not complete tester.upgrader().maintain(); assertTrue("Application still has failures", tester.application(app.id()).deploymentJobs().hasFailures()); - assertEquals(1, tester.buildSystem().jobs().size()); - tester.buildSystem().takeJobsToRun(); + assertEquals(1, tester.deploymentQueue().jobs().size()); + tester.deploymentQueue().takeJobsToRun(); // Upgrader runs again, nothing happens as there's already a job in progress for this change tester.upgrader().maintain(); - assertTrue("No more jobs triggered at this time", tester.buildSystem().jobs().isEmpty()); + assertTrue("No more jobs triggered at this time", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -374,7 +378,7 @@ public class UpgraderTest { // Applications with default policy start upgrading tester.upgrader().maintain(); - assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size()); + assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size()); // 4/5 applications fail and lowers confidence tester.completeUpgradeWithError(default0, version, "default", systemTest); @@ -388,7 +392,7 @@ public class UpgraderTest { // 5th app passes system-test, but does not trigger next job as upgrade is cancelled assertFalse("No change present", tester.applications().require(default4.id()).change().isPresent()); tester.jobCompletion(systemTest).application(default4).submit(); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } /** @@ -448,7 +452,7 @@ public class UpgraderTest { // Applications with default policy start upgrading to V2 tester.upgrader().maintain(); - assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size()); + assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size()); // 4/5 applications fail (in the last prod zone) and lowers confidence tester.completeUpgradeWithError(default0, v2, "default", DeploymentJobs.JobType.productionUsEast3); @@ -461,15 +465,15 @@ public class UpgraderTest { assertEquals(v2, tester.application("default0").deployments().get(ZoneId.from("prod.us-west-1")).version()); assertEquals(v0, tester.application("default0").deployments().get(ZoneId.from("prod.us-east-3")).version()); tester.upgrader().maintain(); - assertEquals("Upgrade to 5.1 scheduled for apps not completely on 5.1 or 5.2", 5, tester.buildSystem().jobs().size()); + assertEquals("Upgrade to 5.1 scheduled for apps not completely on 5.1 or 5.2", 5, tester.deploymentQueue().jobs().size()); tester.deploymentTrigger().triggerReadyJobs(); - assertEquals("Testing of 5.1 for 5 applications is triggered", 5, tester.buildSystem().jobs().size()); - assertEquals(systemTest.jobName(), tester.buildSystem().jobs().get(0).jobName()); - assertEquals(systemTest.jobName(), tester.buildSystem().jobs().get(1).jobName()); - assertEquals(systemTest.jobName(), tester.buildSystem().jobs().get(2).jobName()); - assertEquals(systemTest.jobName(), tester.buildSystem().jobs().get(3).jobName()); - assertEquals(systemTest.jobName(), tester.buildSystem().jobs().get(4).jobName()); + assertEquals("Testing of 5.1 for 5 applications is triggered", 5, tester.deploymentQueue().jobs().size()); + assertEquals(systemTest.jobName(), tester.deploymentQueue().jobs().get(0).jobName()); + assertEquals(systemTest.jobName(), tester.deploymentQueue().jobs().get(1).jobName()); + assertEquals(systemTest.jobName(), tester.deploymentQueue().jobs().get(2).jobName()); + assertEquals(systemTest.jobName(), tester.deploymentQueue().jobs().get(3).jobName()); + assertEquals(systemTest.jobName(), tester.deploymentQueue().jobs().get(4).jobName()); // The tester code for completing upgrades does not handle this scenario, so we trigger each step manually (for one app) tester.deployAndNotify(tester.application("default0"), "default", true, systemTest); @@ -560,19 +564,19 @@ public class UpgraderTest { // Application is not upgraded at this time tester.upgrader().maintain(); - assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty()); // One hour passes, time is 19:00, still no upgrade tester.clock().advance(Duration.ofHours(1)); tester.upgrader().maintain(); - assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty()); // Two hours pass in total, time is 20:00 and application upgrades tester.clock().advance(Duration.ofHours(1)); tester.upgrader().maintain(); - assertFalse("Job is scheduled", tester.buildSystem().jobs().isEmpty()); + assertFalse("Job is scheduled", tester.deploymentQueue().jobs().isEmpty()); tester.completeUpgrade(app, version, applicationPackage); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -607,19 +611,19 @@ public class UpgraderTest { tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest); clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered tester.deployAndNotify(app, applicationPackage, true, productionUsWest1); - assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window + assertTrue(tester.deploymentQueue().jobs().isEmpty()); // Next job not triggered due to being in the block window // One hour passes, time is 19:00, still no upgrade tester.clock().advance(Duration.ofHours(1)); readyJobsTrigger.maintain(); - assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("No jobs scheduled", tester.deploymentQueue().jobs().isEmpty()); // Another hour pass, time is 20:00 and application upgrades tester.clock().advance(Duration.ofHours(1)); readyJobsTrigger.maintain(); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } /** @@ -663,7 +667,7 @@ public class UpgraderTest { tester.deployAndNotify(app, applicationPackage, true, productionUsWest1); clock.advance(Duration.ofHours(1)); // Entering block window after prod job is triggered tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); - assertTrue(tester.buildSystem().jobs().isEmpty()); // Next job not triggered due to being in the block window + assertTrue(tester.deploymentQueue().jobs().isEmpty()); // Next job not triggered due to being in the block window // A day passes and we get a new version tester.clock().advance(Duration.ofDays(1)); @@ -671,7 +675,7 @@ public class UpgraderTest { tester.updateVersionStatus(version); tester.upgrader().maintain(); readyJobsTrigger.maintain(); - assertTrue("Nothing is scheduled", tester.buildSystem().jobs().isEmpty()); + assertTrue("Nothing is scheduled", tester.deploymentQueue().jobs().isEmpty()); // Monday morning: We are not blocked tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00 @@ -685,8 +689,8 @@ public class UpgraderTest { tester.deployAndNotify(app, applicationPackage, true, productionUsWest1); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); - + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); + // App is completely upgraded to the latest version for (Deployment deployment : tester.applications().require(app.id()).deployments().values()) assertEquals(version, deployment.version()); @@ -735,7 +739,7 @@ public class UpgraderTest { // Applications with default policy start upgrading tester.clock().advance(Duration.ofMinutes(1)); tester.upgrader().maintain(); - assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size()); + assertEquals("Upgrade scheduled for remaining apps", 5, tester.deploymentQueue().jobs().size()); // 4/5 applications fail, confidence is lowered and upgrade is cancelled tester.completeUpgradeWithError(default0, version, defaultApplicationPackage, systemTest); @@ -775,7 +779,8 @@ public class UpgraderTest { assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence()); tester.upgrader().maintain(); - assertEquals("Upgrade scheduled for previously failing apps", 4, tester.buildSystem().jobs().size()); + + assertEquals("Upgrade scheduled for previously failing apps", 4, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(default0, version, defaultApplicationPackageV2); tester.completeUpgrade(default1, version, defaultApplicationPackageV2); tester.completeUpgrade(default2, version, defaultApplicationPackageV2); @@ -817,24 +822,24 @@ public class UpgraderTest { assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); upgrader.maintain(); - assertEquals(2, tester.buildSystem().jobs().size()); + assertEquals(2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(canary0, version, "canary"); tester.completeUpgrade(canary1, version, "canary"); tester.updateVersionStatus(version); // Next run upgrades a subset upgrader.maintain(); - assertEquals(2, tester.buildSystem().jobs().size()); + assertEquals(2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(default0, version, "default"); tester.completeUpgrade(default2, version, "default"); // Remaining applications upgraded upgrader.maintain(); - assertEquals(2, tester.buildSystem().jobs().size()); + assertEquals(2, tester.deploymentQueue().jobs().size()); tester.completeUpgrade(default1, version, "default"); tester.completeUpgrade(default3, version, "default"); upgrader.maintain(); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); } @Test @@ -877,7 +882,7 @@ public class UpgraderTest { tester.deployAndNotify(app, applicationPackage, true, systemTest); tester.deployAndNotify(app, applicationPackage, true, stagingTest); tester.deployAndNotify(app, applicationPackage, true, productionUsWest1); - assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); + assertTrue("All jobs consumed", tester.deploymentQueue().jobs().isEmpty()); app = tester.application(app.id()); for (Deployment deployment : app.deployments().values()) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java index c668bde0d40..05b671baea0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java @@ -44,7 +44,7 @@ public class VersionStatusSerializerTest { VespaVersion a = status.versions().get(i); VespaVersion b = deserialized.versions().get(i); assertEquals(a.releaseCommit(), b.releaseCommit()); - assertEquals(a.releasedAt(), b.releasedAt()); + assertEquals(a.committedAt(), b.committedAt()); assertEquals(a.isCurrentSystemVersion(), b.isCurrentSystemVersion()); assertEquals(a.statistics(), b.statistics()); assertEquals(a.configServerHostnames(), b.configServerHostnames()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ControllerContainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ControllerContainerTest.java index 413e9ff36bb..19ded95a764 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ControllerContainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ControllerContainerTest.java @@ -68,6 +68,7 @@ public class ControllerContainerTest { " <component id='com.yahoo.vespa.hosted.controller.NodeRepositoryClientMock'/>\n" + " <component id='com.yahoo.vespa.hosted.controller.ZoneRegistryMock'/>\n" + " <component id='com.yahoo.vespa.hosted.controller.Controller'/>\n" + + " <component id='com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService'/>\n" + " <component id='com.yahoo.vespa.hosted.controller.ConfigServerProxyMock'/>\n" + " <component id='com.yahoo.vespa.hosted.controller.integration.MockMetricsService'/>\n" + " <component id='com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance'/>\n" + diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index ecc20110445..4a263bf1fbd 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -2,13 +2,19 @@ package com.yahoo.vespa.hosted.controller.restapi.application; import com.yahoo.application.container.handler.Request; +import com.yahoo.application.container.handler.Response; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.RegionName; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; import com.yahoo.vespa.athenz.api.AthenzDomain; import com.yahoo.vespa.athenz.api.AthenzIdentity; import com.yahoo.vespa.athenz.api.AthenzUser; import com.yahoo.vespa.athenz.api.NToken; +import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ConfigServerClientMock; import com.yahoo.vespa.hosted.controller.api.identifiers.PropertyId; @@ -21,15 +27,18 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServ import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; import com.yahoo.vespa.hosted.controller.api.integration.organization.MockOrganization; import com.yahoo.vespa.hosted.controller.api.integration.organization.User; +import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.ClusterInfo; import com.yahoo.vespa.hosted.controller.application.ClusterUtilization; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; +import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.athenz.mock.AthenzClientFactoryMock; import com.yahoo.vespa.hosted.controller.athenz.mock.AthenzDbMock; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; +import com.yahoo.vespa.hosted.controller.deployment.BuildJob; import com.yahoo.vespa.hosted.controller.restapi.ContainerControllerTester; import com.yahoo.vespa.hosted.controller.restapi.ContainerTester; import com.yahoo.vespa.hosted.controller.restapi.ControllerContainerTest; @@ -57,6 +66,11 @@ import static com.yahoo.application.container.handler.Request.Method.DELETE; import static com.yahoo.application.container.handler.Request.Method.GET; import static com.yahoo.application.container.handler.Request.Method.POST; import static com.yahoo.application.container.handler.Request.Method.PUT; +import static junit.framework.TestCase.assertNotNull; +import static junit.framework.TestCase.assertNull; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; /** * @author bratseth @@ -80,6 +94,8 @@ public class ApplicationApiTest extends ControllerContainerTest { private static final UserId USER_ID = new UserId("myuser"); private static final UserId HOSTED_VESPA_OPERATOR = new UserId("johnoperator"); private static final NToken N_TOKEN = new NToken("dummy"); + private static final ZoneId TEST_ZONE = ZoneId.from(Environment.test, RegionName.from("us-east-1")); + private static final ZoneId STAGING_ZONE = ZoneId.from(Environment.staging, RegionName.from("us-east-3")); @Test public void testApplicationApi() throws Exception { @@ -772,6 +788,125 @@ public class ApplicationApiTest extends ControllerContainerTest { } + @Test + public void testJobStatusReporting() throws Exception { + ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); + addUserToHostedOperatorRole(HostedAthenzIdentities.from(HOSTED_VESPA_OPERATOR)); + tester.containerTester().updateSystemVersion(); + long projectId = 1; + Application app = tester.createApplication(); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("corp-us-east-1") + .build(); + + Version vespaVersion = new Version("6.1"); // system version from mock config server client + + BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository()) + .application(app) + .projectId(projectId); + job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit(); + tester.deploy(app, applicationPackage, TEST_ZONE, projectId); + job.type(DeploymentJobs.JobType.systemTest).submit(); + + // Notifying about unknown job fails + Request request = request("/application/v4/tenant/tenant1/application/application1/jobreport", POST) + .data(asJson(job.type(DeploymentJobs.JobType.productionUsEast3).report())) + .userIdentity(HOSTED_VESPA_OPERATOR) + .get(); + tester.containerTester().assertResponse(request, new File("jobreport-unexpected-completion.json"), 400); + + // ... and assert it was recorded + JobStatus recordedStatus = + tester.controller().applications().get(app.id()).get().deploymentJobs().jobStatus().get(DeploymentJobs.JobType.component); + + assertNotNull("Status was recorded", recordedStatus); + assertTrue(recordedStatus.isSuccess()); + assertEquals(vespaVersion, recordedStatus.lastCompleted().get().version()); + + recordedStatus = + tester.controller().applications().get(app.id()).get().deploymentJobs().jobStatus().get(DeploymentJobs.JobType.productionApNortheast2); + assertNull("Status of never-triggered jobs is empty", recordedStatus); + + Response response; + + response = container.handleRequest(request("/screwdriver/v1/jobsToRun", GET).get()); + assertTrue("Response contains system-test", response.getBodyAsString().contains(DeploymentJobs.JobType.systemTest.jobName())); + assertTrue("Response contains staging-test", response.getBodyAsString().contains(DeploymentJobs.JobType.stagingTest.jobName())); + assertEquals("Response contains only two items", 2, SlimeUtils.jsonToSlime(response.getBody()).get().entries()); + + // Check that GET didn't affect the enqueued jobs. + response = container.handleRequest(request("/screwdriver/v1/jobsToRun", GET).get()); + assertTrue("Response contains system-test", response.getBodyAsString().contains(DeploymentJobs.JobType.systemTest.jobName())); + assertTrue("Response contains staging-test", response.getBodyAsString().contains(DeploymentJobs.JobType.stagingTest.jobName())); + assertEquals("Response contains only two items", 2, SlimeUtils.jsonToSlime(response.getBody()).get().entries()); + + } + + @Test + public void testJobStatusReportingOutOfCapacity() { + ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); + tester.containerTester().updateSystemVersion(); + + long projectId = 1; + Application app = tester.createApplication(); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("corp-us-east-1") + .build(); + + // Report job failing with out of capacity + BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository()) + .application(app) + .projectId(projectId); + job.type(DeploymentJobs.JobType.component).uploadArtifact(applicationPackage).submit(); + + tester.deploy(app, applicationPackage, TEST_ZONE, projectId); + job.type(DeploymentJobs.JobType.systemTest).submit(); + tester.deploy(app, applicationPackage, STAGING_ZONE, projectId); + job.type(DeploymentJobs.JobType.stagingTest).error(DeploymentJobs.JobError.outOfCapacity).submit(); + + // Appropriate error is recorded + JobStatus jobStatus = tester.controller().applications().get(app.id()) + .get() + .deploymentJobs() + .jobStatus() + .get(DeploymentJobs.JobType.stagingTest); + assertFalse(jobStatus.isSuccess()); + assertEquals(DeploymentJobs.JobError.outOfCapacity, jobStatus.jobError().get()); + } + + private void notifyCompletion(DeploymentJobs.JobReport report) { + assertResponse(request("/application/v4/tenant/tenant1/application/application1/jobreport", POST) + .userIdentity(HOSTED_VESPA_OPERATOR) + .data(asJson(report)) + .get(), + 200, "{\"message\":\"ok\"}"); + } + + private static byte[] asJson(DeploymentJobs.JobReport report) { + Slime slime = new Slime(); + Cursor cursor = slime.setObject(); + cursor.setLong("projectId", report.projectId()); + cursor.setString("jobName", report.jobType().jobName()); + cursor.setLong("buildNumber", report.buildNumber()); + report.jobError().ifPresent(jobError -> cursor.setString("jobError", jobError.name())); + report.sourceRevision().ifPresent(sr -> { + Cursor sourceRevision = cursor.setObject("sourceRevision"); + sourceRevision.setString("repository", sr.repository()); + sourceRevision.setString("branch", sr.branch()); + sourceRevision.setString("commit", sr.commit()); + }); + cursor.setString("tenant", report.applicationId().tenant().value()); + cursor.setString("application", report.applicationId().application().value()); + cursor.setString("instance", report.applicationId().instance().value()); + try { + return SlimeUtils.toJsonBytes(slime); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + private HttpEntity createApplicationDeployData(ApplicationPackage applicationPackage, Optional<Long> screwdriverJobId) { return createApplicationDeployData(Optional.of(applicationPackage), screwdriverJobId); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/responses/unexpected-completion.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobreport-unexpected-completion.json index 8ffd9511a96..8ffd9511a96 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/responses/unexpected-completion.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/jobreport-unexpected-completion.json diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java index c7128bb4cfc..e5f3af8c06a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java @@ -47,32 +47,43 @@ public class ControllerApiTest extends ControllerContainerTest { // Get current configuration tester.assertResponse(authenticatedRequest("http://localhost:8080/controller/v1/jobs/upgrader", new byte[0], Request.Method.GET), - "{\"upgradesPerMinute\":0.5,\"ignoreConfidence\":false}", + "{\"upgradesPerMinute\":0.5,\"confidenceOverrides\":[]}", 200); // Set invalid configuration - ; tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":-1}", Request.Method.PATCH), "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Upgrades per minute must be >= 0\"}", 400); - // Unrecognized field + // Ignores unrecognized field tester.assertResponse( - hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":bar}", Request.Method.PATCH), - "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Unable to configure upgrader with data in request: '{\\\"foo\\\":bar}'\"}", + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":\"bar\"}", Request.Method.PATCH), + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"No such modifiable field(s)\"}", 400); - // Patch configuration + // Set upgrades per minute tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":42.0}", Request.Method.PATCH), - "{\"upgradesPerMinute\":42.0,\"ignoreConfidence\":false}", + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[]}", 200); - // Patch configuration + // Override confidence tester.assertResponse( - hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"ignoreConfidence\":true}", Request.Method.PATCH), - "{\"upgradesPerMinute\":42.0,\"ignoreConfidence\":true}", + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"}]}", + 200); + + // Override confidence for another version + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.43", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"},{\"6.43\":\"broken\"}]}", + 200); + + // Remove first override + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "", Request.Method.DELETE), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.43\":\"broken\"}]}", 200); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json index 8f8b76c83c6..f1e4d8c6355 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json @@ -16,6 +16,9 @@ "name": "DeploymentIssueReporter" }, { + "name": "DeploymentJobExecutor" + }, + { "name": "DeploymentMetricsMaintainer" }, { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index a56ab028233..4de3b9abd5b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -83,7 +83,7 @@ public class DeploymentApiTest extends ControllerContainerTest { if ( ! version.configServerHostnames().isEmpty()) version = new VespaVersion(version.statistics(), version.releaseCommit(), - version.releasedAt(), + version.committedAt(), version.isCurrentSystemVersion(), ImmutableSet.of("config1.test", "config2.test"), VespaVersion.confidenceFrom(version.statistics(), controller) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilterTest.java index 497f865a2a5..c4863a0eb79 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/filter/ControllerAuthorizationFilterTest.java @@ -57,7 +57,6 @@ public class ControllerAuthorizationFilterTest { assertIsAllowed(invokeFilter(filter, createRequest(PUT, "/application/v4/user", USER))); assertIsAllowed(invokeFilter(filter, createRequest(POST, "/application/v4/tenant/john", USER))); assertIsAllowed(invokeFilter(filter, createRequest(DELETE, "/screwdriver/v1/jobsToRun", USER))); - assertIsAllowed(invokeFilter(filter, createRequest(DELETE, "/screwdriver/v1/jobreport", USER))); } @Test diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java index dfeabaf051c..8ff233663b7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java @@ -2,52 +2,27 @@ package com.yahoo.vespa.hosted.controller.restapi.screwdriver; import com.yahoo.application.container.handler.Request; -import com.yahoo.application.container.handler.Response; -import com.yahoo.component.Version; -import com.yahoo.config.provision.Environment; -import com.yahoo.config.provision.RegionName; -import com.yahoo.slime.Cursor; -import com.yahoo.slime.Slime; -import com.yahoo.vespa.athenz.api.AthenzIdentity; -import com.yahoo.vespa.athenz.api.AthenzUser; -import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; -import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; -import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.application.JobStatus; -import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; -import com.yahoo.vespa.hosted.controller.deployment.BuildJob; -import com.yahoo.vespa.hosted.controller.deployment.BuildSystem; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentQueue; import com.yahoo.vespa.hosted.controller.restapi.ContainerControllerTester; import com.yahoo.vespa.hosted.controller.restapi.ControllerContainerTest; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import org.junit.Test; import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; -import static junit.framework.TestCase.assertNotNull; -import static junit.framework.TestCase.assertNull; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; /** * @author bratseth * @author jvenstad */ -// TODO Move /application/v4/.../jobreport specific testing to ApplicationApiTest public class ScrewdriverApiTest extends ControllerContainerTest { private static final String responseFiles = "src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/responses/"; - private static final ZoneId testZone = ZoneId.from(Environment.test, RegionName.from("us-east-1")); - private static final ZoneId stagingZone = ZoneId.from(Environment.staging, RegionName.from("us-east-3")); - private static final AthenzIdentity HOSTED_VESPA_OPERATOR = AthenzUser.fromUserId("johnoperator"); @Test public void testGetReleaseStatus() throws Exception { @@ -62,101 +37,9 @@ public class ScrewdriverApiTest extends ControllerContainerTest { } @Test - public void testJobStatusReporting() throws Exception { - ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); - addUserToHostedOperatorRole(HOSTED_VESPA_OPERATOR); - tester.containerTester().updateSystemVersion(); - long projectId = 1; - Application app = tester.createApplication(); - ApplicationPackage applicationPackage = new ApplicationPackageBuilder() - .environment(Environment.prod) - .region("corp-us-east-1") - .build(); - - Version vespaVersion = new Version("6.1"); // system version from mock config server client - - BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository()) - .application(app) - .projectId(projectId); - job.type(JobType.component).uploadArtifact(applicationPackage).submit(); - tester.deploy(app, applicationPackage, testZone, projectId); - job.type(JobType.systemTest).submit(); - - // Notifying about unknown job fails - Request request = new Request("http://localhost:8080/application/v4/tenant/tenant1/application/application1/jobreport", - asJson(job.type(JobType.productionUsEast3).report()), - Request.Method.POST); - addIdentityToRequest(request, HOSTED_VESPA_OPERATOR); - tester.containerTester().assertResponse(request, new File("unexpected-completion.json"), 400); - - // ... and assert it was recorded - JobStatus recordedStatus = - tester.controller().applications().get(app.id()).get().deploymentJobs().jobStatus().get(JobType.component); - - assertNotNull("Status was recorded", recordedStatus); - assertTrue(recordedStatus.isSuccess()); - assertEquals(vespaVersion, recordedStatus.lastCompleted().get().version()); - - recordedStatus = - tester.controller().applications().get(app.id()).get().deploymentJobs().jobStatus().get(JobType.productionApNortheast2); - assertNull("Status of never-triggered jobs is empty", recordedStatus); - - Response response; - - response = container.handleRequest(new Request("http://localhost:8080/screwdriver/v1/jobsToRun", "", Request.Method.GET)); - assertTrue("Response contains system-test", response.getBodyAsString().contains(JobType.systemTest.jobName())); - assertTrue("Response contains staging-test", response.getBodyAsString().contains(JobType.stagingTest.jobName())); - assertEquals("Response contains only two items", 2, SlimeUtils.jsonToSlime(response.getBody()).get().entries()); - - // Check that GET didn't affect the enqueued jobs. - response = container.handleRequest(new Request("http://localhost:8080/screwdriver/v1/jobsToRun", "", Request.Method.DELETE)); - assertTrue("Response contains system-test", response.getBodyAsString().contains(JobType.systemTest.jobName())); - assertTrue("Response contains staging-test", response.getBodyAsString().contains(JobType.stagingTest.jobName())); - assertEquals("Response contains only two items", 2, SlimeUtils.jsonToSlime(response.getBody()).get().entries()); - - Thread.sleep(50); - // Check that the *first* DELETE has removed the enqueued jobs. - assertResponse(new Request("http://localhost:8080/screwdriver/v1/jobsToRun", "", Request.Method.DELETE), - 200, "[]"); - } - - @Test - public void testJobStatusReportingOutOfCapacity() { - ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); - tester.containerTester().updateSystemVersion(); - - long projectId = 1; - Application app = tester.createApplication(); - ApplicationPackage applicationPackage = new ApplicationPackageBuilder() - .environment(Environment.prod) - .region("corp-us-east-1") - .build(); - - // Report job failing with out of capacity - BuildJob job = new BuildJob(this::notifyCompletion, tester.artifactRepository()) - .application(app) - .projectId(projectId); - job.type(JobType.component).uploadArtifact(applicationPackage).submit(); - - tester.deploy(app, applicationPackage, testZone, projectId); - job.type(JobType.systemTest).submit(); - tester.deploy(app, applicationPackage, stagingZone, projectId); - job.type(JobType.stagingTest).error(JobError.outOfCapacity).submit(); - - // Appropriate error is recorded - JobStatus jobStatus = tester.controller().applications().get(app.id()) - .get() - .deploymentJobs() - .jobStatus() - .get(JobType.stagingTest); - assertFalse(jobStatus.isSuccess()); - assertEquals(JobError.outOfCapacity, jobStatus.jobError().get()); - } - - @Test public void testTriggerJobForApplication() { ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); - BuildSystem buildSystem = tester.controller().applications().deploymentTrigger().buildSystem(); + DeploymentQueue deploymentQueue = tester.controller().applications().deploymentTrigger().deploymentQueue(); tester.containerTester().updateSystemVersion(); Application app = tester.createApplication(); @@ -180,49 +63,19 @@ public class ScrewdriverApiTest extends ControllerContainerTest { new byte[0], Request.Method.POST), 200, "{\"message\":\"Triggered component for tenant1.application1\"}"); - assertFalse(buildSystem.jobs().isEmpty()); - assertEquals(JobType.component.jobName(), buildSystem.jobs().get(0).jobName()); - assertEquals(1L, buildSystem.jobs().get(0).projectId()); - buildSystem.takeJobsToRun(); + assertFalse(deploymentQueue.jobs().isEmpty()); + assertEquals(JobType.component.jobName(), deploymentQueue.jobs().get(0).jobName()); + assertEquals(1L, deploymentQueue.jobs().get(0).projectId()); + deploymentQueue.takeJobsToRun(); // Triggers specific job when given assertResponse(new Request("http://localhost:8080/screwdriver/v1/trigger/tenant/" + app.id().tenant().value() + "/application/" + app.id().application().value(), "staging-test".getBytes(StandardCharsets.UTF_8), Request.Method.POST), 200, "{\"message\":\"Triggered staging-test for tenant1.application1\"}"); - assertFalse(buildSystem.jobs().isEmpty()); - assertEquals(JobType.stagingTest.jobName(), buildSystem.jobs().get(0).jobName()); - assertEquals(1L, buildSystem.jobs().get(0).projectId()); - } - - private void notifyCompletion(DeploymentJobs.JobReport report) { - assertResponse(new Request("http://localhost:8080/application/v4/tenant/tenant1/application/application1/jobreport", - asJson(report), - Request.Method.POST), - 200, "{\"message\":\"ok\"}"); - } - - private static byte[] asJson(DeploymentJobs.JobReport report) { - Slime slime = new Slime(); - Cursor cursor = slime.setObject(); - cursor.setLong("projectId", report.projectId()); - cursor.setString("jobName", report.jobType().jobName()); - cursor.setLong("buildNumber", report.buildNumber()); - report.jobError().ifPresent(jobError -> cursor.setString("jobError", jobError.name())); - report.sourceRevision().ifPresent(sr -> { - Cursor sourceRevision = cursor.setObject("sourceRevision"); - sourceRevision.setString("repository", sr.repository()); - sourceRevision.setString("branch", sr.branch()); - sourceRevision.setString("commit", sr.commit()); - }); - cursor.setString("tenant", report.applicationId().tenant().value()); - cursor.setString("application", report.applicationId().application().value()); - cursor.setString("instance", report.applicationId().instance().value()); - try { - return SlimeUtils.toJsonBytes(slime); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + assertFalse(deploymentQueue.jobs().isEmpty()); + assertEquals(JobType.stagingTest.jobName(), deploymentQueue.jobs().get(0).jobName()); + assertEquals(1L, deploymentQueue.jobs().get(0).projectId()); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java index 868ea50128d..27e26e3267a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java @@ -243,44 +243,31 @@ public class VersionStatusTest { } @Test - public void testIgnoreConfidence() { + public void testConfidenceOverride() { DeploymentTester tester = new DeploymentTester(); - Version version0 = new Version("5.0"); tester.upgradeSystem(version0); - // Setup applications - all running on version0 - Application canary0 = tester.createAndDeploy("canary0", 1, "canary"); - Application canary1 = tester.createAndDeploy("canary1", 2, "canary"); - Application default0 = tester.createAndDeploy("default0", 3, "default"); - Application default1 = tester.createAndDeploy("default1", 4, "default"); - Application default2 = tester.createAndDeploy("default2", 5, "default"); - Application default3 = tester.createAndDeploy("default3", 6, "default"); - Application default4 = tester.createAndDeploy("default4", 7, "default"); + // Create and deploy application on current version + Application app = tester.createAndDeploy("app", 1, "canary"); + tester.updateVersionStatus(); + assertEquals(Confidence.high, confidence(tester.controller(), version0)); - // New version is released - Version version1 = new Version("5.1"); - tester.upgradeSystem(version1); + // Override confidence + tester.upgrader().overrideConfidence(version0, Confidence.broken); + tester.updateVersionStatus(); + assertEquals(Confidence.broken, confidence(tester.controller(), version0)); - // All canaries upgrade successfully, 1 default apps ok, 3 default apps fail - tester.completeUpgrade(canary0, version1, "canary"); - tester.completeUpgrade(canary1, version1, "canary"); + // New version is released and application upgrades + Version version1 = new Version("5.1"); tester.upgradeSystem(version1); - tester.completeUpgrade(default0, version1, "default"); - tester.completeUpgradeWithError(default1, version1, "default", stagingTest); - tester.completeUpgradeWithError(default2, version1, "default", stagingTest); - tester.completeUpgradeWithError(default3, version1, "default", stagingTest); - tester.completeUpgradeWithError(default4, version1, "default", stagingTest); + tester.completeUpgrade(app, version1, "canary"); tester.updateVersionStatus(); - assertEquals("Canaries have upgraded, 1 of 4 default apps failing: Broken", - Confidence.broken, confidence(tester.controller(), version1)); + assertEquals(Confidence.high, confidence(tester.controller(), version1)); - // Same as above, but ignore confidence calculations, will force normal confidence - tester.controllerTester().curator().writeIgnoreConfidence(true); - tester.updateVersionStatus(); - assertEquals("Canaries have upgraded, 1 of 4 default apps failing, but confidence ignored: Low", - Confidence.normal, confidence(tester.controller(), version1)); - tester.controllerTester().curator().writeIgnoreConfidence(false); + // Stale override was removed + assertFalse("Stale override removed", tester.controller().curator().readConfidenceOverrides() + .keySet().contains(version0)); } private Confidence confidence(Controller controller, Version version) { diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 3e0c0a12a4f..38abfec3c8c 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -25,6 +25,7 @@ vespa_define_module( src/tests/eval/value_type src/tests/gp/ponder_nov2017 src/tests/tensor/dense_dot_product_function + src/tests/tensor/dense_fast_rename_function src/tests/tensor/dense_tensor_address_combiner src/tests/tensor/dense_tensor_builder src/tests/tensor/dense_xw_product_function diff --git a/eval/src/tests/tensor/dense_fast_rename_function/CMakeLists.txt b/eval/src/tests/tensor/dense_fast_rename_function/CMakeLists.txt new file mode 100644 index 00000000000..a5c3b223ce5 --- /dev/null +++ b/eval/src/tests/tensor/dense_fast_rename_function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_dense_fast_rename_function_test_app TEST + SOURCES + dense_fast_rename_function_test.cpp + DEPENDS + vespaeval +) +vespa_add_test(NAME eval_dense_fast_rename_function_test_app COMMAND eval_dense_fast_rename_function_test_app) diff --git a/eval/src/tests/tensor/dense_fast_rename_function/dense_fast_rename_function_test.cpp b/eval/src/tests/tensor/dense_fast_rename_function/dense_fast_rename_function_test.cpp new file mode 100644 index 00000000000..fab16f1e276 --- /dev/null +++ b/eval/src/tests/tensor/dense_fast_rename_function/dense_fast_rename_function_test.cpp @@ -0,0 +1,74 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/simple_tensor.h> +#include <vespa/eval/eval/simple_tensor_engine.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/dense/dense_fast_rename_function.h> +#include <vespa/eval/tensor/dense/dense_tensor.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/eval/eval/test/eval_fixture.h> + +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/stash.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::test; +using namespace vespalib::tensor; +using namespace vespalib::eval::tensor_function; + +const TensorEngine &prod_engine = DefaultTensorEngine::ref(); + +EvalFixture::ParamRepo make_params() { + return EvalFixture::ParamRepo() + .add("x5", spec({x(5)}, N())) + .add("x5_u", spec({x(5)}, N()), "tensor(x[])") + .add("x_m", spec({x({"a", "b", "c"})}, N())) + .add("x5y3", spec({x(5),y(3)}, N())); +} +EvalFixture::ParamRepo param_repo = make_params(); + +void verify_optimized(const vespalib::string &expr) { + EvalFixture fixture(prod_engine, expr, param_repo, true); + EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fixture.find_all<DenseFastRenameFunction>(); + EXPECT_EQUAL(info.size(), 1u); +} + +void verify_not_optimized(const vespalib::string &expr) { + EvalFixture fixture(prod_engine, expr, param_repo, true); + EXPECT_EQUAL(fixture.result(), EvalFixture::ref(expr, param_repo)); + auto info = fixture.find_all<DenseFastRenameFunction>(); + EXPECT_TRUE(info.empty()); +} + +TEST("require that non-transposing dense renames are optimized") { + TEST_DO(verify_optimized("rename(x5,x,y)")); + TEST_DO(verify_optimized("rename(x5,x,a)")); + TEST_DO(verify_optimized("rename(x5y3,y,z)")); + TEST_DO(verify_optimized("rename(x5y3,x,a)")); + TEST_DO(verify_optimized("rename(x5y3,(x,y),(a,b))")); + TEST_DO(verify_optimized("rename(x5y3,(x,y),(z,zz))")); + TEST_DO(verify_optimized("rename(x5y3,(x,y),(y,z))")); + TEST_DO(verify_optimized("rename(x5y3,(y,x),(b,a))")); +} + +TEST("require that transposing dense renames are not optimized") { + TEST_DO(verify_not_optimized("rename(x5y3,x,z)")); + TEST_DO(verify_not_optimized("rename(x5y3,y,a)")); + TEST_DO(verify_not_optimized("rename(x5y3,(x,y),(y,x))")); + TEST_DO(verify_not_optimized("rename(x5y3,(x,y),(b,a))")); + TEST_DO(verify_not_optimized("rename(x5y3,(y,x),(a,b))")); +} + +TEST("require that abstract dense renames are not optimized") { + TEST_DO(verify_not_optimized("rename(x5_u,x,y)")); +} + +TEST("require that non-dense renames are not optimized") { + TEST_DO(verify_not_optimized("rename(x_m,x,y)")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index 5f8be58105a..7b4a502dd4d 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -9,6 +9,7 @@ #include "dense/dense_tensor_builder.h" #include "dense/dense_dot_product_function.h" #include "dense/dense_xw_product_function.h" +#include "dense/dense_fast_rename_function.h" #include "dense/vector_from_doubles_function.h" #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/tensor_spec.h> @@ -221,6 +222,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const child.set(VectorFromDoublesFunction::optimize(child.get(), stash)); child.set(DenseDotProductFunction::optimize(child.get(), stash)); child.set(DenseXWProductFunction::optimize(child.get(), stash)); + child.set(DenseFastRenameFunction::optimize(child.get(), stash)); nodes.pop_back(); } return root.get(); diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt index 23cab0c5f79..73315b7a120 100644 --- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -1,15 +1,16 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_library(eval_tensor_dense OBJECT SOURCES - direct_dense_tensor_builder.cpp dense_dot_product_function.cpp - dense_xw_product_function.cpp + dense_fast_rename_function.cpp dense_tensor.cpp dense_tensor_address_combiner.cpp dense_tensor_builder.cpp dense_tensor_cells_iterator.cpp - dense_tensor_view.cpp dense_tensor_reduce.cpp - vector_from_doubles_function.cpp + dense_tensor_view.cpp + dense_xw_product_function.cpp + direct_dense_tensor_builder.cpp mutable_dense_tensor_view.cpp + vector_from_doubles_function.cpp ) diff --git a/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.cpp b/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.cpp new file mode 100644 index 00000000000..dda95d5a657 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.cpp @@ -0,0 +1,84 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "dense_fast_rename_function.h" +#include "dense_tensor.h" +#include "dense_tensor_view.h" +#include <vespa/eval/eval/value.h> +#include <vespa/eval/tensor/tensor.h> + +namespace vespalib::tensor { + +using CellsRef = DenseTensorView::CellsRef; +using eval::Value; +using eval::ValueType; +using eval::TensorFunction; +using eval::as; +using namespace eval::tensor_function; + +namespace { + +CellsRef getCellsRef(const eval::Value &value) { + const DenseTensorView &denseTensor = static_cast<const DenseTensorView &>(value); + return denseTensor.cellsRef(); +} + +void my_fast_rename_op(eval::InterpretedFunction::State &state, uint64_t param) { + const ValueType *type = (const ValueType *)(param); + CellsRef cells = getCellsRef(state.peek(0)); + state.pop_push(state.stash.create<DenseTensorView>(*type, cells)); +} + +bool is_concrete_dense_stable_rename(const ValueType &from_type, const ValueType &to_type, + const std::vector<vespalib::string> &from, + const std::vector<vespalib::string> &to) +{ + if (!from_type.is_dense() || from_type.is_abstract() || + !to_type.is_dense() || to_type.is_abstract() || + (from.size() != to.size())) + { + return false; + } + size_t npos = ValueType::Dimension::npos; + for (size_t i = 0; i < from.size(); ++i) { + size_t old_idx = from_type.dimension_index(from[i]); + size_t new_idx = to_type.dimension_index(to[i]); + if ((old_idx != new_idx) || (old_idx == npos)) { + return false; + } + } + return true; +} + +} // namespace vespalib::tensor::<unnamed> + + +DenseFastRenameFunction::DenseFastRenameFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child) + : eval::tensor_function::Op1(result_type, child) +{ +} + +DenseFastRenameFunction::~DenseFastRenameFunction() +{ +} + +eval::InterpretedFunction::Instruction +DenseFastRenameFunction::compile_self(Stash &) const +{ + return eval::InterpretedFunction::Instruction(my_fast_rename_op, (uint64_t)&(result_type())); +} + +const TensorFunction & +DenseFastRenameFunction::optimize(const eval::TensorFunction &expr, Stash &stash) +{ + if (auto rename = as<Rename>(expr)) { + const ValueType &from_type = rename->child().result_type(); + const ValueType &to_type = expr.result_type(); + if (is_concrete_dense_stable_rename(from_type, to_type, rename->from(), rename->to())) { + return stash.create<DenseFastRenameFunction>(to_type, rename->child()); + } + } + return expr; +} + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.h b/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.h new file mode 100644 index 00000000000..e7de8e95ff0 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/dense_fast_rename_function.h @@ -0,0 +1,23 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::tensor { + +/** + * Tensor function for efficient non-transposing rename of a dense + * tensor. + **/ +class DenseFastRenameFunction : public eval::tensor_function::Op1 +{ +public: + DenseFastRenameFunction(const eval::ValueType &result_type, + const eval::TensorFunction &child); + ~DenseFastRenameFunction(); + eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override; + static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash); +}; + +} // namespace vespalib::tensor diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/Environment.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/Environment.java index 0fe9cc97b0f..ef97e08d7f3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/Environment.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/component/Environment.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.TimeZone; import java.util.stream.Collectors; @@ -30,7 +31,7 @@ import java.util.stream.Collectors; /** * Various utilities for getting values from node-admin's environment. Immutable. * - * @author bakksjo + * @author Øyvind Bakksjø * @author hmusum */ public class Environment { @@ -39,12 +40,15 @@ public class Environment { private static final String ENVIRONMENT = "ENVIRONMENT"; private static final String REGION = "REGION"; + private static final String SYSTEM = "SYSTEM"; private static final String LOGSTASH_NODES = "LOGSTASH_NODES"; private static final String COREDUMP_FEED_ENDPOINT = "COREDUMP_FEED_ENDPOINT"; + private final List<String> configServerHostNames; private final List<URI> configServerURIs; private final String environment; private final String region; + private final String system; private final String parentHostHostname; private final InetAddressResolver inetAddressResolver; private final PathResolver pathResolver; @@ -61,8 +65,11 @@ public class Environment { public Environment(ConfigServerConfig configServerConfig) { this(configServerConfig, + // TODO: Are these three ever set? Does not look like they are. How can this work then? getEnvironmentVariable(ENVIRONMENT), getEnvironmentVariable(REGION), + getEnvironmentVariable(SYSTEM), + new PathResolver(), Optional.of(getEnvironmentVariable(COREDUMP_FEED_ENDPOINT)), NodeType.host); @@ -71,16 +78,14 @@ public class Environment { public Environment(ConfigServerConfig configServerConfig, String hostedEnvironment, String hostedRegion, + String hostedSystem, PathResolver pathResolver, Optional<String> coreDumpFeedEndpoint, NodeType nodeType) { - this(createConfigServerUris( - configServerConfig.scheme(), - configServerConfig.hosts(), - configServerConfig.port()), - + this(configServerConfig, hostedEnvironment, hostedRegion, + hostedSystem, Defaults.getDefaults().vespaHostname(), new InetAddressResolver(), pathResolver, @@ -104,9 +109,10 @@ public class Environment { ); } - public Environment(List<URI> configServerURIs, + public Environment(ConfigServerConfig configServerConfig, String environment, String region, + String system, String parentHostHostname, InetAddressResolver inetAddressResolver, PathResolver pathResolver, @@ -116,9 +122,14 @@ public class Environment { Optional<KeyStoreOptions> trustStoreOptions, Optional<AthenzIdentity> athenzIdentity, NodeType nodeType) { - this.configServerURIs = configServerURIs; + this.configServerHostNames = configServerConfig.hosts(); + this.configServerURIs = createConfigServerUris( + configServerConfig.scheme(), + configServerConfig.hosts(), + configServerConfig.port()); this.environment = environment; this.region = region; + this.system = system; this.parentHostHostname = parentHostHostname; this.inetAddressResolver = inetAddressResolver; this.pathResolver = pathResolver; @@ -130,16 +141,20 @@ public class Environment { this.nodeType = nodeType; } + public List<String> getConfigServerHostNames() { return configServerHostNames; } + public List<URI> getConfigServerUris() { return configServerURIs; } - public String getEnvironment() { - return environment; - } + public String getEnvironment() { return environment; } public String getRegion() { return region; } + public String getSystem() { + return system; + } + public String getParentHostHostname() { return parentHostHostname; } @@ -164,7 +179,7 @@ public class Environment { private static List<String> getLogstashNodesFromEnvironment() { String logstashNodes = System.getenv(LOGSTASH_NODES); - if(Strings.isNullOrEmpty(logstashNodes)) { + if (Strings.isNullOrEmpty(logstashNodes)) { return Collections.emptyList(); } return Arrays.asList(logstashNodes.split("[,\\s]+")); @@ -265,9 +280,10 @@ public class Environment { public NodeType getNodeType() { return nodeType; } public static class Builder { - private List<URI> configServerURIs = Collections.emptyList(); + ConfigServerConfig configServerConfig = new ConfigServerConfig(new ConfigServerConfig.Builder()); private String environment; private String region; + private String system; private String parentHostHostname; private InetAddressResolver inetAddressResolver; private PathResolver pathResolver; @@ -278,8 +294,8 @@ public class Environment { private AthenzIdentity athenzIdentity; private NodeType nodeType = NodeType.tenant; - public Builder configServerUris(List<URI> uris) { - configServerURIs = uris; + public Builder configServerConfig(ConfigServerConfig configServerConfig) { + this.configServerConfig = configServerConfig; return this; } @@ -293,6 +309,11 @@ public class Environment { return this; } + public Builder system(String system) { + this.system = system; + return this; + } + public Builder parentHostHostname(String parentHostHostname) { this.parentHostHostname = parentHostHostname; return this; @@ -339,14 +360,22 @@ public class Environment { } public Environment build() { - return new Environment(configServerURIs, environment, region, parentHostHostname, - Optional.ofNullable(inetAddressResolver).orElseGet(InetAddressResolver::new), - Optional.ofNullable(pathResolver).orElseGet(PathResolver::new), - logstashNodes, feedEndpoint, - Optional.ofNullable(keyStoreOptions), - Optional.ofNullable(trustStoreOptions), - Optional.ofNullable(athenzIdentity), - nodeType); + Objects.requireNonNull(environment, "environment cannot be null"); + Objects.requireNonNull(region, "region cannot be null"); + Objects.requireNonNull(system, "system cannot be null"); + return new Environment(configServerConfig, + environment, + region, + system, + parentHostHostname, + Optional.ofNullable(inetAddressResolver).orElseGet(InetAddressResolver::new), + Optional.ofNullable(pathResolver).orElseGet(PathResolver::new), + logstashNodes, + feedEndpoint, + Optional.ofNullable(keyStoreOptions), + Optional.ofNullable(trustStoreOptions), + Optional.ofNullable(athenzIdentity), + nodeType); } } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index bc8a45f2dfb..d90d5b22eeb 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -310,11 +310,8 @@ public class DockerOperationsImpl implements DockerOperations { private String createContainerEnvironmentSettings(Environment environment, ContainerNodeSpec nodeSpec) { ObjectMapper objectMapper = new ObjectMapper(); - String configServers = environment.getConfigServerUris().stream() - .map(URI::getHost) - .collect(Collectors.joining(",")); ContainerEnvironmentSettings settings = new ContainerEnvironmentSettings(); - settings.set("configServerAddresses", configServers); + settings.set("configServerAddresses", environment.getConfigServerHostNames()); settings.set("nodeType", nodeSpec.nodeType); try { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 7b0ba1cae58..fcbe4e15213 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -105,8 +105,6 @@ public class NodeAgentImpl implements NodeAgent { private ContainerState containerState = UNKNOWN; - // The attributes of the last successful node repo attribute update for this node. Used to avoid redundant calls. - private NodeAttributes lastAttributesSet = null; private ContainerNodeSpec lastNodeSpec = null; private CpuUsageReporter lastCpuMetric = new CpuUsageReporter(); @@ -230,7 +228,13 @@ public class NodeAgentImpl implements NodeAgent { } private void updateNodeRepoWithCurrentAttributes(final ContainerNodeSpec nodeSpec) { - final NodeAttributes nodeAttributes = new NodeAttributes() + final NodeAttributes currentNodeAttributes = new NodeAttributes() + .withRestartGeneration(nodeSpec.currentRestartGeneration.orElse(null)) + .withRebootGeneration(nodeSpec.currentRebootGeneration.orElse(0L)) + .withDockerImage(nodeSpec.currentDockerImage.orElse(new DockerImage(""))) + .withVespaVersion(nodeSpec.vespaVersion.orElse("")); + + final NodeAttributes wantedNodeAttributes = new NodeAttributes() .withRestartGeneration(nodeSpec.wantedRestartGeneration.orElse(null)) // update reboot gen with wanted gen if set, we ignore reboot for Docker nodes but // want the two to be equal in node repo @@ -238,18 +242,16 @@ public class NodeAgentImpl implements NodeAgent { .withDockerImage(nodeSpec.wantedDockerImage.filter(node -> containerState != ABSENT).orElse(new DockerImage(""))) .withVespaVersion(nodeSpec.wantedVespaVersion.filter(node -> containerState != ABSENT).orElse("")); - publishStateToNodeRepoIfChanged(nodeAttributes); + publishStateToNodeRepoIfChanged(currentNodeAttributes, wantedNodeAttributes); } - private void publishStateToNodeRepoIfChanged(NodeAttributes currentAttributes) { - // TODO: We should only update if the new current values do not match the node repo's current values - if (!currentAttributes.equals(lastAttributesSet)) { + private void publishStateToNodeRepoIfChanged(NodeAttributes currentAttributes, NodeAttributes wantedAttributes) { + if (!currentAttributes.equals(wantedAttributes)) { logger.info("Publishing new set of attributes to node repo: " - + lastAttributesSet + " -> " + currentAttributes); + + currentAttributes + " -> " + wantedAttributes); addDebugMessage("Publishing new set of attributes to node repo: {" + - lastAttributesSet + "} -> {" + currentAttributes + "}"); - nodeRepository.updateNodeAttributes(hostname, currentAttributes); - lastAttributesSet = currentAttributes; + currentAttributes + "} -> {" + wantedAttributes + "}"); + nodeRepository.updateNodeAttributes(hostname, wantedAttributes); } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java index 034c9352a10..e2db9743412 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImplTest.java @@ -27,7 +27,11 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.when; public class DockerOperationsImplTest { - private final Environment environment = new Environment.Builder().build(); + private final Environment environment = new Environment.Builder() + .region("us-east-1") + .environment("prod") + .system("main") + .build(); private final Docker docker = mock(Docker.class); private final ProcessExecuter processExecuter = mock(ProcessExecuter.class); private final DockerOperationsImpl dockerOperations = new DockerOperationsImpl(docker, environment, processExecuter); @@ -54,7 +58,7 @@ public class DockerOperationsImplTest { } @Test(expected = RuntimeException.class) - public void processResultFromNodeProgramWhenNonZeroExitCode() throws Exception { + public void processResultFromNodeProgramWhenNonZeroExitCode() { final ContainerName containerName = new ContainerName("container-name"); final ProcessResult actualResult = new ProcessResult(3, "output", "errors"); final String programPath = "/bin/command"; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java index bacae51ab83..2968c1737a8 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java @@ -59,7 +59,11 @@ public class DockerTester implements AutoCloseable { Environment environment = new Environment.Builder() .inetAddressResolver(inetAddressResolver) - .pathResolver(new PathResolver(pathToVespaHome, Paths.get("/tmp"), Paths.get("/tmp"))).build(); + .region("us-east-1") + .environment("prod") + .system("main") + .pathResolver(new PathResolver(pathToVespaHome, Paths.get("/tmp"), Paths.get("/tmp"))) + .build(); Clock clock = Clock.systemUTC(); DockerOperations dockerOperations = new DockerOperationsImpl(dockerMock, environment, null); StorageMaintainerMock storageMaintainer = new StorageMaintainerMock(dockerOperations, null, environment, callOrderVerifier, clock); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/logging/FilebeatConfigProviderTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/logging/FilebeatConfigProviderTest.java index ad50041ab69..0c24abe8b69 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/logging/FilebeatConfigProviderTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/logging/FilebeatConfigProviderTest.java @@ -25,6 +25,7 @@ public class FilebeatConfigProviderTest { private static final String instance = "default"; private static final String environment = "prod"; private static final String region = "us-north-1"; + private static final String system = "main"; private static final List<String> logstashNodes = ImmutableList.of("logstash1", "logstash2"); @Test @@ -43,6 +44,7 @@ public class FilebeatConfigProviderTest { Environment env = new Environment.Builder() .environment(environment) .region(region) + .system(system) .build(); FilebeatConfigProvider filebeatConfigProvider = new FilebeatConfigProvider(env); @@ -81,6 +83,7 @@ public class FilebeatConfigProviderTest { Environment environment = new Environment.Builder() .environment(FilebeatConfigProviderTest.environment) .region(region) + .system(system) .logstashNodes(ImmutableList.of("unquoted", "\"quoted\"")) .build(); FilebeatConfigProvider filebeatConfigProvider = new FilebeatConfigProvider(environment); @@ -104,6 +107,7 @@ public class FilebeatConfigProviderTest { return new Environment.Builder() .environment(environment) .region(region) + .system(system) .logstashNodes(logstashNodes) .build(); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java index 6aeafebaea7..9c21d7c92f2 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java @@ -34,7 +34,11 @@ import static org.mockito.Mockito.when; public class StorageMaintainerTest { private final ManualClock clock = new ManualClock(); private final Environment environment = new Environment.Builder() - .pathResolver(new PathResolver()).build(); + .region("us-east-1") + .environment("prod") + .system("main") + .pathResolver(new PathResolver()) + .build(); private final DockerOperations docker = mock(DockerOperations.class); private final ProcessExecuter processExecuter = mock(ProcessExecuter.class); private final StorageMaintainer storageMaintainer = new StorageMaintainer(docker, processExecuter, diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 45375586a57..0b9564fad8c 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -81,6 +81,7 @@ public class NodeAgentImplTest { private final Environment environment = new Environment.Builder() .environment("dev") .region("us-east-1") + .system("main") .parentHostHostname("parent.host.name.yahoo.com") .inetAddressResolver(new InetAddressResolver()) .pathResolver(pathResolver).build(); @@ -123,14 +124,6 @@ public class NodeAgentImplTest { final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository); // TODO: Verify this isn't run unless 1st time inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); - // TODO: This should not happen when nothing is changed. Now it happens 1st time through. - inOrder.verify(nodeRepository).updateNodeAttributes( - hostName, - new NodeAttributes() - .withRestartGeneration(restartGeneration) - .withRebootGeneration(rebootGeneration) - .withDockerImage(dockerImage) - .withVespaVersion(vespaVersion)); inOrder.verify(orchestrator).resume(hostName); } @@ -318,12 +311,7 @@ public class NodeAgentImplTest { verify(dockerOperations, never()).removeContainer(any(), any()); verify(orchestrator, never()).resume(any(String.class)); - verify(nodeRepository).updateNodeAttributes( - hostName, new NodeAttributes() - .withRestartGeneration(restartGeneration) - .withRebootGeneration(rebootGeneration) - .withDockerImage(dockerImage) - .withVespaVersion(vespaVersion)); + verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any()); } @Test @@ -351,12 +339,7 @@ public class NodeAgentImplTest { verify(dockerOperations, never()).createContainer(eq(containerName), eq(nodeSpec)); verify(dockerOperations, never()).startContainer(eq(containerName), eq(nodeSpec)); verify(orchestrator, never()).resume(any(String.class)); - verify(nodeRepository).updateNodeAttributes( - hostName, new NodeAttributes() - .withRestartGeneration(restartGeneration) - .withRebootGeneration(rebootGeneration) - .withDockerImage(new DockerImage("")) - .withVespaVersion("")); + verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any()); } @Test @@ -385,12 +368,7 @@ public class NodeAgentImplTest { inOrder.verify(dockerOperations, never()).removeContainer(any(), any()); verify(orchestrator, never()).resume(any(String.class)); - verify(nodeRepository).updateNodeAttributes( - hostName, new NodeAttributes() - .withRestartGeneration(restartGeneration) - .withRebootGeneration(rebootGeneration) - .withDockerImage(dockerImage) - .withVespaVersion(vespaVersion)); + verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any()); } @Test @@ -413,12 +391,7 @@ public class NodeAgentImplTest { nodeAgent.converge(); - verify(nodeRepository).updateNodeAttributes( - hostName, new NodeAttributes() - .withRestartGeneration(restartGeneration) - .withRebootGeneration(rebootGeneration) - .withDockerImage(new DockerImage("")) - .withVespaVersion("")); + verify(nodeRepository, never()).updateNodeAttributes(eq(hostName), any()); } private void nodeRunningContainerIsTakenDownAndCleanedAndRecycled(Node.State nodeState, Optional<Long> wantedRestartGeneration) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/EnvironmentTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/EnvironmentTest.java index 826fc192f6d..4208388f2bd 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/EnvironmentTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/EnvironmentTest.java @@ -16,7 +16,12 @@ import static org.junit.Assert.assertEquals; * @author freva */ public class EnvironmentTest { - private final Environment environment = new Environment.Builder().pathResolver(new PathResolver()).build(); + private final Environment environment = new Environment.Builder() + .region("us-east-1") + .environment("prod") + .system("main") + .pathResolver(new PathResolver()) + .build(); @Test public void testPathInNodeToPathInNodeAdminAndHost() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index a21bd3ff1a1..a2a7aa2545a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.HostLivenessTracker; @@ -57,12 +58,14 @@ public class NodeFailer extends Maintainer { private final Instant constructionTime; private final ThrottlePolicy throttlePolicy; private final Metric metric; + private final ConfigserverConfig configserverConfig; public NodeFailer(Deployer deployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, NodeRepository nodeRepository, Duration downTimeLimit, Clock clock, Orchestrator orchestrator, ThrottlePolicy throttlePolicy, Metric metric, - JobControl jobControl) { + JobControl jobControl, + ConfigserverConfig configserverConfig) { // check ping status every five minutes, but at least twice as often as the down time limit super(nodeRepository, min(downTimeLimit.dividedBy(2), Duration.ofMinutes(5)), jobControl); this.deployer = deployer; @@ -74,6 +77,7 @@ public class NodeFailer extends Maintainer { this.constructionTime = clock.instant(); this.throttlePolicy = throttlePolicy; this.metric = metric; + this.configserverConfig = configserverConfig; } @Override @@ -126,7 +130,7 @@ public class NodeFailer extends Maintainer { Map<Node, String> nodesByFailureReason = new HashMap<>(); for (Node node : nodeRepository().getNodes(Node.State.ready)) { - if (! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) { + if (expectConfigRequests(node) && ! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) { nodesByFailureReason.put(node, "Not receiving config requests from node"); } else if (node.status().hardwareFailureDescription().isPresent()) { nodesByFailureReason.put(node, "Node has hardware failure"); @@ -137,6 +141,10 @@ public class NodeFailer extends Maintainer { return nodesByFailureReason; } + private boolean expectConfigRequests(Node node) { + return !node.type().isDockerHost() || configserverConfig.nodeAdminInContainer(); + } + private boolean hasNodeRequestedConfigAfter(Node node, Instant instant) { return !wasMadeReadyBefore(node, instant) || hasRecordedRequestAfter(node, instant); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index be792630445..7b0606b809b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.google.inject.Inject; +import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.component.AbstractComponent; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Environment; @@ -53,17 +54,20 @@ public class NodeRepositoryMaintenance extends AbstractComponent { @Inject public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, Curator curator, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, - Zone zone, Orchestrator orchestrator, Metric metric) { - this(nodeRepository, deployer, curator, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), orchestrator, metric); + Zone zone, Orchestrator orchestrator, Metric metric, + ConfigserverConfig configserverConfig) { + this(nodeRepository, deployer, curator, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), + orchestrator, metric, configserverConfig); } public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, Curator curator, - HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, - Zone zone, Clock clock, Orchestrator orchestrator, Metric metric) { + HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, + Zone zone, Clock clock, Orchestrator orchestrator, Metric metric, + ConfigserverConfig configserverConfig) { DefaultTimes defaults = new DefaultTimes(zone.environment()); jobControl = new JobControl(nodeRepository.database()); - nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl); + nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl, configserverConfig); periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl); operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, nodeRepository, clock, durationFromEnv("operator_change_redeploy_interval").orElse(defaults.operatorChangeRedeployInterval), jobControl); zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 5534c28cc1a..a03b06fda13 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; @@ -70,19 +71,29 @@ public class NodeFailTester { private final Orchestrator orchestrator; private final NodeRepositoryProvisioner provisioner; private final Curator curator; + private final ConfigserverConfig configserverConfig; private NodeFailTester() { + this(new ConfigserverConfig(new ConfigserverConfig.Builder())); + } + + private NodeFailTester(ConfigserverConfig configserverConfig) { clock = new ManualClock(); curator = new MockCurator(); nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, new MockNameResolver().mockAnyLookup(), - new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); + new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); hostLivenessTracker = new TestHostLivenessTracker(clock); orchestrator = new OrchestratorMock(); + this.configserverConfig = configserverConfig; } - + public static NodeFailTester withTwoApplications() { - NodeFailTester tester = new NodeFailTester(); + return withTwoApplications(new ConfigserverConfig(new ConfigserverConfig.Builder())); + } + + public static NodeFailTester withTwoApplications(ConfigserverConfig configserverConfig) { + NodeFailTester tester = new NodeFailTester(configserverConfig); tester.createReadyNodes(16); tester.createHostNodes(3); @@ -184,7 +195,7 @@ public class NodeFailTester { } public NodeFailer createFailer() { - return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, downtimeLimitOneHour, clock, orchestrator, NodeFailer.ThrottlePolicy.hosted, metric, new JobControl(nodeRepository.database())); + return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, downtimeLimitOneHour, clock, orchestrator, NodeFailer.ThrottlePolicy.hosted, metric, new JobControl(nodeRepository.database()), configserverConfig); } public void allNodesMakeAConfigRequestExcept(Node ... deadNodeArray) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 6d41cfa08e5..63bc04ac671 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.applicationmodel.ServiceInstance; @@ -194,6 +195,53 @@ public class NodeFailerTest { } @Test + public void docker_host_failed_without_config_requests() { + NodeFailTester tester = NodeFailTester.withTwoApplications(); + + // For a day all nodes work so nothing happens + for (int minutes = 0, interval = 30; minutes < 24 * 60; minutes += interval) { + tester.clock.advance(Duration.ofMinutes(interval)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size()); + assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size()); + } + + + // Two ready nodes and a ready docker node die, but only 2 of those are failed out + tester.clock.advance(Duration.ofMinutes(180)); + Node dockerHost = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).iterator().next(); + tester.allNodesMakeAConfigRequestExcept(dockerHost); + tester.failer.run(); + assertEquals( 2, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size()); + assertEquals( 1, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size()); + } + + @Test + public void not_failed_without_config_requests_if_node_admin_on_host() { + NodeFailTester tester = NodeFailTester.withTwoApplications( + new ConfigserverConfig(new ConfigserverConfig.Builder().nodeAdminInContainer(false))); + + // For a day all nodes work so nothing happens + for (int minutes = 0, interval = 30; minutes < 24 * 60; minutes += interval) { + tester.clock.advance(Duration.ofMinutes(interval)); + tester.allNodesMakeAConfigRequestExcept(); + tester.failer.run(); + assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size()); + assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size()); + } + + + // Two ready nodes and a ready docker node die, but only 2 of those are failed out + tester.clock.advance(Duration.ofMinutes(180)); + Node dockerHost = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).iterator().next(); + tester.allNodesMakeAConfigRequestExcept(dockerHost); + tester.failer.run(); + assertEquals( 3, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready).size()); + assertEquals( 0, tester.nodeRepository.getNodes(NodeType.host, Node.State.failed).size()); + } + + @Test public void failing_docker_hosts() { NodeFailTester tester = NodeFailTester.withTwoApplicationsOnDocker(7); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java index c16256df73d..7f0227df4c5 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java @@ -6,6 +6,7 @@ import com.yahoo.container.jaxrs.annotation.Component; import com.yahoo.jrt.slobrok.api.Mirror; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceStatus; @@ -15,7 +16,7 @@ import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.restapi.wire.SlobrokEntryResponse; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.StatusService; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import javax.inject.Inject; import javax.ws.rs.GET; @@ -48,16 +49,16 @@ public class InstanceResource { public static final String DEFAULT_SLOBROK_PATTERN = "**"; private final StatusService statusService; - private final SlobrokMonitorManager slobrokMonitorManager; + private final SlobrokApi slobrokApi; private final InstanceLookupService instanceLookupService; @Inject public InstanceResource(@Component InstanceLookupService instanceLookupService, @Component StatusService statusService, - @Component SlobrokMonitorManager slobrokMonitorManager) { + @Component SlobrokApi slobrokApi) { this.instanceLookupService = instanceLookupService; this.statusService = statusService; - this.slobrokMonitorManager = slobrokMonitorManager; + this.slobrokApi = slobrokApi; } @GET @@ -96,7 +97,7 @@ public class InstanceResource { pattern = DEFAULT_SLOBROK_PATTERN; } - List<Mirror.Entry> entries = slobrokMonitorManager.lookup(applicationId, pattern); + List<Mirror.Entry> entries = slobrokApi.lookup(applicationId, pattern); return entries.stream() .map(entry -> new SlobrokEntryResponse(entry.getName(), entry.getSpec())) .collect(Collectors.toList()); @@ -107,11 +108,16 @@ public class InstanceResource { @Produces(MediaType.APPLICATION_JSON) public ServiceStatus getServiceStatus( @PathParam("instanceId") String instanceId, + @QueryParam("clusterId") String clusterIdString, @QueryParam("serviceType") String serviceTypeString, @QueryParam("configId") String configIdString) { ApplicationInstanceReference reference = parseInstanceId(instanceId); ApplicationId applicationId = OrchestratorUtil.toApplicationId(reference); + if (clusterIdString == null) { + throwBadRequest("Missing clusterId query parameter"); + } + if (serviceTypeString == null) { throwBadRequest("Missing serviceType query parameter"); } @@ -120,10 +126,11 @@ public class InstanceResource { throwBadRequest("Missing configId query parameter"); } + ClusterId clusterId = new ClusterId(clusterIdString); ServiceType serviceType = new ServiceType(serviceTypeString); ConfigId configId = new ConfigId(configIdString); - return slobrokMonitorManager.getStatus(applicationId, serviceType, configId); + return slobrokApi.getStatus(applicationId, clusterId, serviceType, configId); } static ApplicationInstanceReference parseInstanceId(String instanceIdString) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java index 42b5b70ab55..d7255327ba6 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java @@ -4,11 +4,12 @@ package com.yahoo.vespa.orchestrator.resources; import com.fasterxml.jackson.databind.ObjectMapper; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.restapi.wire.SlobrokEntryResponse; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import org.junit.Test; import javax.ws.rs.WebApplicationException; @@ -27,12 +28,13 @@ public class InstanceResourceTest { private static final List<Mirror.Entry> ENTRIES = Arrays.asList( new Mirror.Entry("name1", "spec1"), new Mirror.Entry("name2", "spec2")); + private static final ClusterId CLUSTER_ID = new ClusterId("cluster-id"); - private final SlobrokMonitorManager slobrokMonitorManager = mock(SlobrokMonitorManager.class); + private final SlobrokApi slobrokApi = mock(SlobrokApi.class); private final InstanceResource resource = new InstanceResource( null, null, - slobrokMonitorManager); + slobrokApi); @Test public void testGetSlobrokEntries() throws Exception { @@ -49,31 +51,32 @@ public class InstanceResourceTest { ServiceType serviceType = new ServiceType("serviceType"); ConfigId configId = new ConfigId("configId"); ServiceStatus serviceStatus = ServiceStatus.UP; - when(slobrokMonitorManager.getStatus(APPLICATION_ID, serviceType, configId)) + when(slobrokApi.getStatus(APPLICATION_ID, CLUSTER_ID, serviceType, configId)) .thenReturn(serviceStatus); ServiceStatus actualServiceStatus = resource.getServiceStatus( APPLICATION_INSTANCE_REFERENCE, + CLUSTER_ID.s(), serviceType.s(), configId.s()); - verify(slobrokMonitorManager).getStatus(APPLICATION_ID, serviceType, configId); + verify(slobrokApi).getStatus(APPLICATION_ID, CLUSTER_ID, serviceType, configId); assertEquals(serviceStatus, actualServiceStatus); } @Test(expected = WebApplicationException.class) public void testBadRequest() { - resource.getServiceStatus(APPLICATION_INSTANCE_REFERENCE, null, null); + resource.getServiceStatus(APPLICATION_INSTANCE_REFERENCE, CLUSTER_ID.s(), null, null); } private void testGetSlobrokEntriesWith(String pattern, String expectedLookupPattern) throws Exception{ - when(slobrokMonitorManager.lookup(APPLICATION_ID, expectedLookupPattern)) + when(slobrokApi.lookup(APPLICATION_ID, expectedLookupPattern)) .thenReturn(ENTRIES); List<SlobrokEntryResponse> response = resource.getSlobrokEntries( APPLICATION_INSTANCE_REFERENCE, pattern); - verify(slobrokMonitorManager).lookup(APPLICATION_ID, expectedLookupPattern); + verify(slobrokApi).lookup(APPLICATION_ID, expectedLookupPattern); ObjectMapper mapper = new ObjectMapper(); String actualJson = mapper.writeValueAsString(response); diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java new file mode 100644 index 00000000000..35003313775 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java @@ -0,0 +1,19 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public interface ServiceStatusProvider { + /** Get the {@link ServiceStatus} of a particular service. */ + ServiceStatus getStatus(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + ConfigId configId); +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokApi.java index 9cffa1192be..dff605b888d 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokMonitorManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokApi.java @@ -3,23 +3,13 @@ package com.yahoo.vespa.service.monitor; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; -import com.yahoo.vespa.applicationmodel.ConfigId; -import com.yahoo.vespa.applicationmodel.ServiceStatus; -import com.yahoo.vespa.applicationmodel.ServiceType; import java.util.List; -public interface SlobrokMonitorManager { +public interface SlobrokApi extends ServiceStatusProvider { /** * Get all Slobrok entries that has a name matching pattern as described in * Mirror::lookup. */ List<Mirror.Entry> lookup(ApplicationId application, String pattern); - - /** - * Query the ServiceMonitorStatus of a particular service. - */ - ServiceStatus getStatus(ApplicationId applicationId, - ServiceType serviceType, - ConfigId configId); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java new file mode 100644 index 00000000000..121e1fd5ebf --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java @@ -0,0 +1,39 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.google.inject.Inject; +import com.yahoo.config.model.api.ApplicationInfo; +import com.yahoo.config.model.api.SuperModel; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public class HealthMonitorManager implements MonitorManager { + @Inject + public HealthMonitorManager() {} + + @Override + public void applicationActivated(SuperModel superModel, ApplicationInfo application) { + } + + @Override + public void applicationRemoved(SuperModel superModel, ApplicationId id) { + } + + @Override + public ServiceStatus getStatus(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType, ConfigId configId) { + // TODO: Do proper health check + if (ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType)) { + return ServiceStatus.UP; + } + + throw new IllegalArgumentException("Health monitoring not implemented for application " + + applicationId.toShortString() + ", cluster " + clusterId.s() + ", serviceType " + + serviceType); + } +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java index 961d5701901..ca70b18439b 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java @@ -20,7 +20,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.service.monitor.ServiceModel; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.ServiceStatusProvider; import java.util.HashMap; import java.util.HashSet; @@ -44,7 +44,7 @@ public class ModelGenerator { SuperModel superModel, Zone zone, List<String> configServerHosts, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { Map<ApplicationInstanceReference, ApplicationInstance> applicationInstances = new HashMap<>(); for (ApplicationInfo applicationInfo : superModel.getAllApplicationInfos()) { @@ -52,7 +52,7 @@ public class ModelGenerator { ApplicationInstance applicationInstance = toApplicationInstance( applicationInfo, zone, - slobrokMonitorManager); + serviceStatusProvider); applicationInstances.put(applicationInstance.reference(), applicationInstance); } @@ -70,7 +70,7 @@ public class ModelGenerator { ApplicationInstance toApplicationInstance( ApplicationInfo applicationInfo, Zone zone, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { Map<ServiceClusterKey, Set<ServiceInstance>> groupedServiceInstances = new HashMap<>(); for (HostInfo host : applicationInfo.getModel().getHosts()) { @@ -80,9 +80,10 @@ public class ModelGenerator { ServiceInstance serviceInstance = toServiceInstance( applicationInfo.getApplicationId(), + serviceClusterKey.clusterId(), serviceInfo, hostName, - slobrokMonitorManager); + serviceStatusProvider); if (!groupedServiceInstances.containsKey(serviceClusterKey)) { groupedServiceInstances.put(serviceClusterKey, new HashSet<>()); @@ -114,28 +115,33 @@ public class ModelGenerator { return applicationInstance; } - ServiceClusterKey toServiceClusterKey(ServiceInfo serviceInfo) { - ClusterId clusterId = new ClusterId(serviceInfo.getProperty(CLUSTER_ID_PROPERTY_NAME).orElse("")); + static ClusterId getClusterId(ServiceInfo serviceInfo) { + return new ClusterId(serviceInfo.getProperty(CLUSTER_ID_PROPERTY_NAME).orElse("")); + } + + private ServiceClusterKey toServiceClusterKey(ServiceInfo serviceInfo) { + ClusterId clusterId = getClusterId(serviceInfo); ServiceType serviceType = toServiceType(serviceInfo); return new ServiceClusterKey(clusterId, serviceType); } - ServiceInstance toServiceInstance( + private ServiceInstance toServiceInstance( ApplicationId applicationId, + ClusterId clusterId, ServiceInfo serviceInfo, HostName hostName, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { ConfigId configId = new ConfigId(serviceInfo.getConfigId()); - ServiceStatus status = slobrokMonitorManager.getStatus( + ServiceStatus status = serviceStatusProvider.getStatus( applicationId, - toServiceType(serviceInfo), - configId); + clusterId, + toServiceType(serviceInfo), configId); return new ServiceInstance(configId, hostName, status); } - ApplicationInstanceId toApplicationInstanceId(ApplicationInfo applicationInfo, Zone zone) { + private ApplicationInstanceId toApplicationInstanceId(ApplicationInfo applicationInfo, Zone zone) { return new ApplicationInstanceId(String.format("%s:%s:%s:%s", applicationInfo.getApplicationId().application().value(), zone.environment().value(), @@ -143,7 +149,7 @@ public class ModelGenerator { applicationInfo.getApplicationId().instance().value())); } - ServiceType toServiceType(ServiceInfo serviceInfo) { + private ServiceType toServiceType(ServiceInfo serviceInfo) { return new ServiceType(serviceInfo.getServiceType()); } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java new file mode 100644 index 00000000000..49863672c43 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java @@ -0,0 +1,11 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.config.model.api.SuperModelListener; +import com.yahoo.vespa.service.monitor.ServiceStatusProvider; + +/** + * @author hakon + */ +public interface MonitorManager extends SuperModelListener, ServiceStatusProvider { +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java index 282a0797912..b2b6538fe6c 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java @@ -15,42 +15,41 @@ import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import java.util.stream.Collectors; public class ServiceMonitorImpl implements ServiceMonitor { - private static final Logger logger = Logger.getLogger(ServiceMonitorImpl.class.getName()); - - private final Zone zone; - private final List<String> configServerHosts; private final ServiceModelCache serviceModelCache; @Inject public ServiceMonitorImpl(SuperModelProvider superModelProvider, ConfigserverConfig configserverConfig, SlobrokMonitorManagerImpl slobrokMonitorManager, + HealthMonitorManager healthMonitorManager, Metric metric, Timer timer) { - this.zone = superModelProvider.getZone(); - this.configServerHosts = toConfigServerList(configserverConfig); + Zone zone = superModelProvider.getZone(); + List<String> configServerHosts = toConfigServerList(configserverConfig); ServiceMonitorMetrics metrics = new ServiceMonitorMetrics(metric, timer); - SuperModelListenerImpl superModelListener = new SuperModelListenerImpl( + UnionMonitorManager monitorManager = new UnionMonitorManager( slobrokMonitorManager, + healthMonitorManager, + configserverConfig); + + SuperModelListenerImpl superModelListener = new SuperModelListenerImpl( + monitorManager, metrics, new ModelGenerator(), zone, configServerHosts); superModelListener.start(superModelProvider); - serviceModelCache = new ServiceModelCache( - () -> superModelListener.get(), - timer); + serviceModelCache = new ServiceModelCache(superModelListener, timer); } private List<String> toConfigServerList(ConfigserverConfig configserverConfig) { if (configserverConfig.multitenant()) { return configserverConfig.zookeeperserver().stream() - .map(server -> server.hostname()) + .map(ConfigserverConfig.Zookeeperserver::hostname) .collect(Collectors.toList()); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java index 801f4b05079..b96364bf95e 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java @@ -8,10 +8,11 @@ import com.yahoo.config.model.api.SuperModelListener; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import java.util.HashMap; import java.util.List; @@ -19,7 +20,7 @@ import java.util.Optional; import java.util.function.Supplier; import java.util.logging.Logger; -public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMonitorManager { +public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokApi, MonitorManager { private static final Logger logger = Logger.getLogger(SlobrokMonitorManagerImpl.class.getName()); @@ -30,7 +31,7 @@ public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMon @Inject public SlobrokMonitorManagerImpl() { - this(() -> new SlobrokMonitor()); + this(SlobrokMonitor::new); } SlobrokMonitorManagerImpl(Supplier<SlobrokMonitor> slobrokMonitorFactory) { @@ -74,7 +75,7 @@ public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMon @Override public ServiceStatus getStatus(ApplicationId applicationId, - ServiceType serviceType, + ClusterId clusterId, ServiceType serviceType, ConfigId configId) { Optional<String> slobrokServiceName = findSlobrokServiceName(serviceType, configId); if (slobrokServiceName.isPresent()) { diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java index 82d55cd05d7..5e309d3c18d 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java @@ -24,10 +24,10 @@ public class SuperModelListenerImpl implements SuperModelListener, Supplier<Serv // superModel and slobrokMonitorManager are always updated together // and atomically using this monitor. private final Object monitor = new Object(); - private final SlobrokMonitorManagerImpl slobrokMonitorManager; + private final MonitorManager slobrokMonitorManager; private SuperModel superModel; - SuperModelListenerImpl(SlobrokMonitorManagerImpl slobrokMonitorManager, + SuperModelListenerImpl(MonitorManager slobrokMonitorManager, ServiceMonitorMetrics metrics, ModelGenerator modelGenerator, Zone zone, diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java new file mode 100644 index 00000000000..0bb4dea5a94 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java @@ -0,0 +1,57 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.cloud.config.ConfigserverConfig; +import com.yahoo.config.model.api.ApplicationInfo; +import com.yahoo.config.model.api.SuperModel; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public class UnionMonitorManager implements MonitorManager { + private final SlobrokMonitorManagerImpl slobrokMonitorManager; + private final HealthMonitorManager healthMonitorManager; + private final ConfigserverConfig configserverConfig; + + UnionMonitorManager(SlobrokMonitorManagerImpl slobrokMonitorManager, + HealthMonitorManager healthMonitorManager, + ConfigserverConfig configserverConfig) { + this.slobrokMonitorManager = slobrokMonitorManager; + this.healthMonitorManager = healthMonitorManager; + this.configserverConfig = configserverConfig; + } + + @Override + public ServiceStatus getStatus(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + ConfigId configId) { + MonitorManager monitorManager = useHealth(applicationId, clusterId, serviceType) ? + healthMonitorManager : + slobrokMonitorManager; + + return monitorManager.getStatus(applicationId, clusterId, serviceType, configId); + } + + @Override + public void applicationActivated(SuperModel superModel, ApplicationInfo application) { + slobrokMonitorManager.applicationActivated(superModel, application); + healthMonitorManager.applicationActivated(superModel, application); + } + + @Override + public void applicationRemoved(SuperModel superModel, ApplicationId id) { + slobrokMonitorManager.applicationRemoved(superModel, id); + healthMonitorManager.applicationRemoved(superModel, id); + } + + private boolean useHealth(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType) { + return !configserverConfig.nodeAdminInContainer() && + ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType); + } +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java new file mode 100644 index 00000000000..f7097e867df --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java @@ -0,0 +1,26 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ServiceType; + +import java.util.Objects; + +/** + * @author hakon + */ +public class ZoneApplication { + private ZoneApplication() {} + + static final ApplicationId ZONE_APPLICATION_ID = + ApplicationId.from("hosted-vespa", "routing", "default"); + + static boolean isNodeAdminService(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType) { + return Objects.equals(applicationId, ZONE_APPLICATION_ID) && + Objects.equals(serviceType, ServiceType.CONTAINER) && + Objects.equals(clusterId, ClusterId.NODE_ADMIN); + } +} diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java index cf07c39950c..1348c04a7e5 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java @@ -44,7 +44,7 @@ public class ModelGeneratorTest { .collect(Collectors.toList()); SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); - when(slobrokMonitorManager.getStatus(any(), any(), any())) + when(slobrokMonitorManager.getStatus(any(), any(), any(), any())) .thenReturn(ServiceStatus.UP); ServiceModel serviceModel = @@ -88,7 +88,7 @@ public class ModelGeneratorTest { List<String> configServerHosts = Collections.emptyList(); SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); - when(slobrokMonitorManager.getStatus(any(), any(), any())) + when(slobrokMonitorManager.getStatus(any(), any(), any(), any())) .thenReturn(ServiceStatus.UP); ServiceModel serviceModel = diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplImplTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplTest.java index 79f927f6161..ab50b3192e3 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplImplTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.service.monitor.internal; import com.yahoo.config.model.api.ApplicationInfo; import com.yahoo.config.model.api.SuperModel; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; @@ -19,7 +20,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -public class SlobrokMonitorManagerImplImplTest { +public class SlobrokMonitorManagerImplTest { // IntelliJ complains if parametrized type is specified, Maven complains if not specified. @SuppressWarnings("unchecked") private final Supplier<SlobrokMonitor> slobrokMonitorFactory = mock(Supplier.class); @@ -29,6 +30,7 @@ public class SlobrokMonitorManagerImplImplTest { private final SlobrokMonitor slobrokMonitor = mock(SlobrokMonitor.class); private final SuperModel superModel = mock(SuperModel.class); private final ApplicationInfo application = mock(ApplicationInfo.class); + private final ClusterId clusterId = new ClusterId("cluster-id"); @Before public void setup() { @@ -70,8 +72,8 @@ public class SlobrokMonitorManagerImplImplTest { private ServiceStatus getStatus(String serviceType) { return slobrokMonitorManager.getStatus( application.getApplicationId(), - new ServiceType(serviceType), - new ConfigId("config.id")); + clusterId, + new ServiceType(serviceType), new ConfigId("config.id")); } @Test diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java new file mode 100644 index 00000000000..2597ebe65d3 --- /dev/null +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java @@ -0,0 +1,93 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.cloud.config.ConfigserverConfig; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceType; +import org.junit.Test; + +import static com.yahoo.vespa.applicationmodel.ClusterId.NODE_ADMIN; +import static com.yahoo.vespa.applicationmodel.ServiceType.CONTAINER; +import static com.yahoo.vespa.service.monitor.internal.ZoneApplication.ZONE_APPLICATION_ID; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class UnionMonitorManagerTest { + @Test + public void nodeAdminInContainer() { + testWith( + true, + ZONE_APPLICATION_ID, + NODE_ADMIN, + CONTAINER, + 1, + 0); + } + + @Test + public void nodeAdminOutsideContainer() { + boolean inContainer = false; + + // When nodeAdminInContainer is set, then only the node admin cluster should use health + testWith( + inContainer, + ZONE_APPLICATION_ID, + NODE_ADMIN, + CONTAINER, + 0, + 1); + + testWith( + inContainer, + ApplicationId.fromSerializedForm("a:b:default"), + NODE_ADMIN, + CONTAINER, + 1, + 0); + + testWith( + inContainer, + ZONE_APPLICATION_ID, + new ClusterId("foo"), + CONTAINER, + 1, + 0); + + testWith( + inContainer, + ZONE_APPLICATION_ID, + NODE_ADMIN, + new ServiceType("foo"), + 1, + 0); + } + + private void testWith(boolean nodeAdminInContainer, + ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + int expectedSlobrokCalls, + int expectedHealthCalls) { + SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); + HealthMonitorManager healthMonitorManager = mock(HealthMonitorManager.class); + + ConfigserverConfig.Builder builder = new ConfigserverConfig.Builder(); + builder.nodeAdminInContainer(nodeAdminInContainer); + ConfigserverConfig config = new ConfigserverConfig(builder); + + + UnionMonitorManager manager = new UnionMonitorManager( + slobrokMonitorManager, + healthMonitorManager, + config); + + manager.getStatus(applicationId, clusterId, serviceType, new ConfigId("config-id")); + + verify(slobrokMonitorManager, times(expectedSlobrokCalls)).getStatus(any(), any(), any(), any()); + verify(healthMonitorManager, times(expectedHealthCalls)).getStatus(any(), any(), any(), any()); + } +}
\ No newline at end of file diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index 7831328460d..559afffc795 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -182,12 +182,18 @@ protected: bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const; - ClusterInformation::CSP createClusterInfo(const std::string& clusterState) { + ClusterInformation::CSP createClusterInfo(const std::string& clusterStateString) { + lib::ClusterState baselineClusterState(clusterStateString); + lib::ClusterStateBundle clusterStateBundle(baselineClusterState); ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( getBucketDBUpdater().getDistributorComponent().getIndex(), - lib::ClusterState(clusterState), + clusterStateBundle, "ui")); + auto &repo = getBucketSpaceRepo(); + for (auto &elem : repo) { + elem.second->setClusterState(clusterStateBundle.getDerivedClusterState(elem.first)); + } return clusterInfo; } diff --git a/storage/src/tests/distributor/distributortestutil.cpp b/storage/src/tests/distributor/distributortestutil.cpp index fbf5a14c052..8310266c9cb 100644 --- a/storage/src/tests/distributor/distributortestutil.cpp +++ b/storage/src/tests/distributor/distributortestutil.cpp @@ -240,7 +240,7 @@ DistributorTestUtil::removeFromBucketDB(const document::BucketId& id) void DistributorTestUtil::addIdealNodes(const document::BucketId& id) { - addIdealNodes(getExternalOperationHandler().getClusterState(), id); + addIdealNodes(*getExternalOperationHandler().getClusterStateBundle().getBaselineClusterState(), id); } void @@ -389,7 +389,7 @@ DistributorTestUtil::getBucketSpaces() const void DistributorTestUtil::enableDistributorClusterState(vespalib::stringref state) { - _distributor->enableClusterState(lib::ClusterState(state)); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(lib::ClusterState(state))); } } diff --git a/storage/src/tests/distributor/idealstatemanagertest.cpp b/storage/src/tests/distributor/idealstatemanagertest.cpp index 7103a89229d..7401e083900 100644 --- a/storage/src/tests/distributor/idealstatemanagertest.cpp +++ b/storage/src/tests/distributor/idealstatemanagertest.cpp @@ -49,7 +49,7 @@ public: void testBlockCheckForAllOperationsToSpecificBucket(); void setSystemState(const lib::ClusterState& systemState) { - _distributor->enableClusterState(systemState); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(systemState)); } CPPUNIT_TEST_SUITE(IdealStateManagerTest); diff --git a/storage/src/tests/distributor/operationtargetresolvertest.cpp b/storage/src/tests/distributor/operationtargetresolvertest.cpp index 1fea6e47656..fe5373a936f 100644 --- a/storage/src/tests/distributor/operationtargetresolvertest.cpp +++ b/storage/src/tests/distributor/operationtargetresolvertest.cpp @@ -153,7 +153,7 @@ OperationTargetResolverTest::getInstances(const BucketId& id, auto &bucketSpaceRepo(getExternalOperationHandler().getBucketSpaceRepo()); auto &distributorBucketSpace(bucketSpaceRepo.get(makeBucketSpace())); idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); - idealNodeCalc.setClusterState(getExternalOperationHandler().getClusterState()); + idealNodeCalc.setClusterState(distributorBucketSpace.getClusterState()); OperationTargetResolverImpl resolver( distributorBucketSpace.getBucketDatabase(), idealNodeCalc, 16, distributorBucketSpace.getDistribution().getRedundancy(), @@ -190,7 +190,7 @@ OperationTargetResolverTest::testMultipleNodes() lib::IdealNodeCalculatorImpl idealNodeCalc; idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); - idealNodeCalc.setClusterState(getExternalOperationHandler().getClusterState()); + idealNodeCalc.setClusterState(distributorBucketSpace.getClusterState()); lib::IdealNodeList idealNodes( idealNodeCalc.getIdealStorageNodes(BucketId(16, i))); uint16_t expectedNode = idealNodes[0].getIndex(); diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index e49cd811dc3..c265a0972af 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -93,7 +93,7 @@ struct StateCheckersTest : public CppUnit::TestFixture, void statsUpdatedWhenMergingDueToOutOfSyncCopies(); void enableClusterState(const lib::ClusterState& systemState) { - _distributor->enableClusterState(systemState); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(systemState)); } void insertJoinableBuckets(); @@ -105,8 +105,7 @@ struct StateCheckersTest : public CppUnit::TestFixture, std::vector<uint16_t> idealNodes( distributorBucketSpace .getDistribution().getIdealStorageNodes( - getIdealStateManager().getDistributorComponent() - .getClusterState(), + distributorBucketSpace.getClusterState(), bucket, "ui")); CPPUNIT_ASSERT_EQUAL(expected, idealNodes); @@ -256,8 +255,7 @@ struct StateCheckersTest : public CppUnit::TestFixture, document::BucketId bid(17, 0); addNodesToBucketDB(bid, params._bucketInfo); setRedundancy(params._redundancy); - _distributor->enableClusterState( - lib::ClusterState(params._clusterState)); + enableDistributorClusterState(params._clusterState); NodeMaintenanceStatsTracker statsTracker; StateChecker::Context c( getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); @@ -996,7 +994,7 @@ StateCheckersTest::testDeleteExtraCopies() std::vector<uint16_t> idealNodes( distributorBucketSpace .getDistribution().getIdealStorageNodes( - getIdealStateManager().getDistributorComponent().getClusterState(), + distributorBucketSpace.getClusterState(), document::BucketId(17, 0), "ui")); std::vector<uint16_t> wanted; diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index b6afcd4f3ab..af580480563 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -1095,7 +1095,7 @@ void VisitorOperationTest::testVisitIdealNode() { ClusterState state("distributor:1 storage:3"); - _distributor->enableClusterState(state); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(state)); // Create buckets in bucketdb for (int i=0; i<32; i++ ) { diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp index cc1181e0d58..84332851340 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp @@ -63,7 +63,7 @@ BucketOwnership BucketDBUpdater::checkOwnershipInPendingState(const document::Bucket& b) const { if (hasPendingClusterState()) { - const lib::ClusterState& state(_pendingClusterState->getNewClusterState()); + const lib::ClusterState& state(*_pendingClusterState->getNewClusterStateBundle().getDerivedClusterState(b.getBucketSpace())); if (!_distributorComponent.ownsBucketInState(state, b)) { return BucketOwnership::createNotOwnedInState(state); } @@ -77,7 +77,7 @@ BucketDBUpdater::sendRequestBucketInfo( const document::Bucket& bucket, const std::shared_ptr<MergeReplyGuard>& mergeReplyGuard) { - if (!_distributorComponent.storageNodeIsUp(node)) { + if (!_distributorComponent.storageNodeIsUp(bucket.getBucketSpace(), node)) { return; } @@ -112,17 +112,18 @@ BucketDBUpdater::recheckBucketInfo(uint32_t nodeIdx, void BucketDBUpdater::removeSuperfluousBuckets( - const lib::ClusterState& newState) + const lib::ClusterStateBundle& newState) { for (auto &elem : _distributorComponent.getBucketSpaceRepo()) { const auto &newDistribution(elem.second->getDistribution()); + const auto &oldClusterState(elem.second->getClusterState()); auto &bucketDb(elem.second->getBucketDatabase()); // Remove all buckets not belonging to this distributor, or // being on storage nodes that are no longer up. NodeRemover proc( - _distributorComponent.getClusterState(), - newState, + oldClusterState, + *newState.getDerivedClusterState(elem.first), _distributorComponent.getBucketIdFactory(), _distributorComponent.getIndex(), newDistribution, @@ -158,11 +159,11 @@ BucketDBUpdater::storageDistributionChanged() { ensureTransitionTimerStarted(); - removeSuperfluousBuckets(_distributorComponent.getClusterState()); + removeSuperfluousBuckets(_distributorComponent.getClusterStateBundle()); ClusterInformation::CSP clusterInfo(new SimpleClusterInformation( _distributorComponent.getIndex(), - _distributorComponent.getClusterState(), + _distributorComponent.getClusterStateBundle(), _distributorComponent.getDistributor().getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForDistributionChange( _distributorComponent.getClock(), @@ -192,21 +193,21 @@ BucketDBUpdater::onSetSystemState( "Received new cluster state %s", cmd->getSystemState().toString().c_str()); - lib::ClusterState oldState = _distributorComponent.getClusterState(); - const lib::ClusterState& state = cmd->getSystemState(); + const lib::ClusterStateBundle oldState = _distributorComponent.getClusterStateBundle(); + const lib::ClusterStateBundle& state = cmd->getClusterStateBundle(); if (state == oldState) { return false; } ensureTransitionTimerStarted(); - removeSuperfluousBuckets(cmd->getSystemState()); + removeSuperfluousBuckets(cmd->getClusterStateBundle()); replyToPreviousPendingClusterStateIfAny(); ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( _distributorComponent.getIndex(), - _distributorComponent.getClusterState(), + _distributorComponent.getClusterStateBundle(), _distributorComponent.getDistributor() .getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForClusterStateChange( @@ -422,7 +423,7 @@ BucketDBUpdater::processSingleBucketInfoReply( BucketRequest req = iter->second; _sentMessages.erase(iter); - if (!_distributorComponent.storageNodeIsUp(req.targetNode)) { + if (!_distributorComponent.storageNodeIsUp(req.bucket.getBucketSpace(), req.targetNode)) { // Ignore replies from nodes that are down. return true; } @@ -488,7 +489,7 @@ BucketDBUpdater::processCompletedPendingClusterState() _pendingClusterState->mergeIntoBucketDatabases(); if (_pendingClusterState->getCommand().get()) { - enableCurrentClusterStateInDistributor(); + enableCurrentClusterStateBundleInDistributor(); _distributorComponent.getDistributor().getMessageSender().sendDown( _pendingClusterState->getCommand()); addCurrentStateToClusterStateHistory(); @@ -503,16 +504,16 @@ BucketDBUpdater::processCompletedPendingClusterState() } void -BucketDBUpdater::enableCurrentClusterStateInDistributor() +BucketDBUpdater::enableCurrentClusterStateBundleInDistributor() { - const lib::ClusterState& state( - _pendingClusterState->getCommand()->getSystemState()); + const lib::ClusterStateBundle& state( + _pendingClusterState->getCommand()->getClusterStateBundle()); LOG(debug, "BucketDBUpdater finished processing state %s", - state.toString().c_str()); + state.getBaselineClusterState()->toString().c_str()); - _distributorComponent.getDistributor().enableClusterState(state); + _distributorComponent.getDistributor().enableClusterStateBundle(state); } void @@ -563,7 +564,7 @@ BucketDBUpdater::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, using namespace vespalib::xml; xos << XmlTag("bucketdb") << XmlTag("systemstate_active") - << XmlContent(_distributorComponent.getClusterState().toString()) + << XmlContent(_distributorComponent.getClusterStateBundle().getBaselineClusterState()->toString()) << XmlEndTag(); if (_pendingClusterState) { xos << *_pendingClusterState; diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.h b/storage/src/vespa/storage/distributor/bucketdbupdater.h index 19e2e259778..a85ee6fe4f7 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.h +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.h @@ -152,11 +152,11 @@ private: void updateState(const lib::ClusterState& oldState, const lib::ClusterState& newState); - void removeSuperfluousBuckets(const lib::ClusterState& newState); + void removeSuperfluousBuckets(const lib::ClusterStateBundle& newState); void replyToPreviousPendingClusterStateIfAny(); - void enableCurrentClusterStateInDistributor(); + void enableCurrentClusterStateBundleInDistributor(); void addCurrentStateToClusterStateHistory(); void enqueueRecheckUntilPendingStateEnabled(uint16_t node, const document::Bucket&); void sendAllQueuedBucketRechecks(); diff --git a/storage/src/vespa/storage/distributor/clusterinformation.cpp b/storage/src/vespa/storage/distributor/clusterinformation.cpp index cd09e4f46d4..96e94c92819 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.cpp +++ b/storage/src/vespa/storage/distributor/clusterinformation.cpp @@ -2,6 +2,7 @@ #include "clusterinformation.h" #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include <vespa/vdslib/state/clusterstate.h> namespace storage::distributor { @@ -9,7 +10,7 @@ namespace storage::distributor { uint16_t ClusterInformation::getStorageNodeCount() const { - return getClusterState().getNodeCount(lib::NodeType::STORAGE); + return getClusterStateBundle().getBaselineClusterState()->getNodeCount(lib::NodeType::STORAGE); } } diff --git a/storage/src/vespa/storage/distributor/clusterinformation.h b/storage/src/vespa/storage/distributor/clusterinformation.h index 25f303d0f52..49abb5e8e75 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.h +++ b/storage/src/vespa/storage/distributor/clusterinformation.h @@ -10,8 +10,7 @@ namespace storage { namespace lib { -class Distribution; -class ClusterState; +class ClusterStateBundle; } @@ -26,7 +25,7 @@ public: virtual uint16_t getDistributorIndex() const = 0; - virtual const lib::ClusterState& getClusterState() const = 0; + virtual const lib::ClusterStateBundle& getClusterStateBundle() const = 0; virtual const char* getStorageUpStates() const = 0; diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index 39658912a2e..86a8ac46cbb 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -63,6 +63,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, : StorageLink("distributor"), DistributorInterface(), framework::StatusReporter("distributor", "Distributor"), + _clusterStateBundle(lib::ClusterState()), _compReg(compReg), _component(compReg, "distributor"), _bucketSpaceRepo(std::make_unique<DistributorBucketSpaceRepo>()), @@ -107,6 +108,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _bucketDBStatusDelegate.registerStatusPage(); hostInfoReporterRegistrar.registerReporter(&_hostInfoReporter); propagateDefaultDistribution(_component.getDistribution()); + propagateClusterStates(); }; Distributor::~Distributor() @@ -331,16 +333,24 @@ Distributor::handleMessage(const std::shared_ptr<api::StorageMessage>& msg) return false; } +const lib::ClusterStateBundle& +Distributor::getClusterStateBundle() const +{ + return _clusterStateBundle; +} + void -Distributor::enableClusterState(const lib::ClusterState& state) +Distributor::enableClusterStateBundle(const lib::ClusterStateBundle& state) { - lib::ClusterState oldState = _clusterState; - _clusterState = state; + lib::ClusterStateBundle oldState = _clusterStateBundle; + _clusterStateBundle = state; + propagateClusterStates(); lib::Node myNode(lib::NodeType::DISTRIBUTOR, _component.getIndex()); + const auto &baselineState = *_clusterStateBundle.getBaselineClusterState(); if (!_doneInitializing && - getClusterState().getNodeState(myNode).getState() == lib::State::UP) + baselineState.getNodeState(myNode).getState() == lib::State::UP) { scanAllBuckets(); _doneInitializing = true; @@ -350,8 +360,8 @@ Distributor::enableClusterState(const lib::ClusterState& state) } // Clear all active messages on nodes that are down. - for (uint16_t i = 0; i < state.getNodeCount(lib::NodeType::STORAGE); ++i) { - if (!state.getNodeState(lib::Node(lib::NodeType::STORAGE, i)).getState() + for (uint16_t i = 0; i < baselineState.getNodeCount(lib::NodeType::STORAGE); ++i) { + if (!baselineState.getNodeState(lib::Node(lib::NodeType::STORAGE, i)).getState() .oneOf(getStorageNodeUpStates())) { std::vector<uint64_t> msgIds( @@ -533,6 +543,14 @@ Distributor::propagateDefaultDistribution( } void +Distributor::propagateClusterStates() +{ + for (auto &iter : *_bucketSpaceRepo) { + iter.second->setClusterState(_clusterStateBundle.getDerivedClusterState(iter.first)); + } +} + +void Distributor::signalWorkWasDone() { _tickResult = framework::ThreadWaitInfo::MORE_WORK_ENQUEUED; diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h index b4b235838c5..e28c6dd6578 100644 --- a/storage/src/vespa/storage/distributor/distributor.h +++ b/storage/src/vespa/storage/distributor/distributor.h @@ -78,7 +78,7 @@ public: * Enables a new cluster state. Called after the bucket db updater has * retrieved all bucket info related to the change. */ - void enableClusterState(const lib::ClusterState& clusterState) override; + void enableClusterStateBundle(const lib::ClusterStateBundle& clusterStateBundle) override; /** * Invoked when a pending cluster state for a distribution (config) @@ -114,9 +114,7 @@ public: */ void checkBucketForSplit(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, uint8_t priority) override; - const lib::ClusterState& getClusterState() const override { - return _clusterState; - } + const lib::ClusterStateBundle& getClusterStateBundle() const override; /** * @return Returns the states in which the distributors consider @@ -233,8 +231,9 @@ private: void enableNextDistribution(); void propagateDefaultDistribution(std::shared_ptr<const lib::Distribution>); + void propagateClusterStates(); - lib::ClusterState _clusterState; + lib::ClusterStateBundle _clusterStateBundle; DistributorComponentRegister& _compReg; storage::DistributorComponent _component; diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp index 68fe9f441d7..dcf7792860e 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp @@ -1,18 +1,30 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "distributor_bucket_space.h" +#include <vespa/vdslib/state/clusterstate.h> +#include <vespa/vdslib/distribution/distribution.h> -namespace storage { -namespace distributor { +namespace storage::distributor { DistributorBucketSpace::DistributorBucketSpace() : _bucketDatabase(), + _clusterState(), _distribution() { } -DistributorBucketSpace::~DistributorBucketSpace() { +DistributorBucketSpace::~DistributorBucketSpace() = default; + +void +DistributorBucketSpace::setClusterState(std::shared_ptr<const lib::ClusterState> clusterState) +{ + _clusterState = std::move(clusterState); } + +void +DistributorBucketSpace::setDistribution(std::shared_ptr<const lib::Distribution> distribution) { + _distribution = std::move(distribution); } + } diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.h b/storage/src/vespa/storage/distributor/distributor_bucket_space.h index 30893e8cfb1..eca50d4263e 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.h +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.h @@ -2,12 +2,12 @@ #pragma once #include <vespa/storage/bucketdb/mapbucketdatabase.h> -#include <vespa/vdslib/distribution/distribution.h> #include <memory> namespace storage { namespace lib { +class ClusterState; class Distribution; } @@ -26,6 +26,7 @@ namespace distributor { */ class DistributorBucketSpace { MapBucketDatabase _bucketDatabase; + std::shared_ptr<const lib::ClusterState> _clusterState; std::shared_ptr<const lib::Distribution> _distribution; public: DistributorBucketSpace(); @@ -43,9 +44,11 @@ public: return _bucketDatabase; } - void setDistribution(std::shared_ptr<const lib::Distribution> distribution) { - _distribution = std::move(distribution); - } + void setClusterState(std::shared_ptr<const lib::ClusterState> clusterState); + + const lib::ClusterState &getClusterState() const noexcept { return *_clusterState; } + + void setDistribution(std::shared_ptr<const lib::Distribution> distribution); // Precondition: setDistribution has been called at least once prior. const lib::Distribution& getDistribution() const noexcept { diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.cpp b/storage/src/vespa/storage/distributor/distributorcomponent.cpp index ac6a2957052..1d2465fb41a 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.cpp +++ b/storage/src/vespa/storage/distributor/distributorcomponent.cpp @@ -3,6 +3,7 @@ #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/messageapi/storagereply.h> #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include "distributor_bucket_space_repo.h" #include "distributor_bucket_space.h" @@ -40,10 +41,10 @@ DistributorComponent::sendUp(const api::StorageMessage::SP& msg) _distributor.getMessageSender().sendUp(msg); } -const lib::ClusterState& -DistributorComponent::getClusterState() const +const lib::ClusterStateBundle& +DistributorComponent::getClusterStateBundle() const { - return _distributor.getClusterState(); + return _distributor.getClusterStateBundle(); }; std::vector<uint16_t> @@ -51,7 +52,7 @@ DistributorComponent::getIdealNodes(const document::Bucket &bucket) const { auto &bucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); return bucketSpace.getDistribution().getIdealStorageNodes( - getClusterState(), + bucketSpace.getClusterState(), bucket.getBucketId(), _distributor.getStorageNodeUpStates()); } @@ -89,7 +90,7 @@ DistributorComponent::checkOwnershipInPendingAndCurrentState( { auto &bucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); return checkOwnershipInPendingAndGivenState( - bucketSpace.getDistribution(), getClusterState(), bucket); + bucketSpace.getDistribution(), bucketSpace.getClusterState(), bucket); } bool @@ -126,7 +127,7 @@ bool DistributorComponent::ownsBucketInCurrentState(const document::Bucket &bucket) const { auto &bucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); - return ownsBucketInState(bucketSpace.getDistribution(), getClusterState(), bucket); + return ownsBucketInState(bucketSpace.getDistribution(), bucketSpace.getClusterState(), bucket); } api::StorageMessageAddress @@ -260,7 +261,7 @@ DistributorComponent::updateBucketDatabase( // Ensure that we're not trying to bring any zombie copies into the // bucket database (i.e. copies on nodes that are actually down). std::vector<uint16_t> downNodes( - enumerateDownNodes(getClusterState(), bucket, changedNodes)); + enumerateDownNodes(bucketSpace.getClusterState(), bucket, changedNodes)); // Optimize for common case where we don't have to create a new // bucket copy vector if (downNodes.empty()) { @@ -305,9 +306,9 @@ DistributorComponent::getBucketId(const document::DocumentId& docId) const } bool -DistributorComponent::storageNodeIsUp(uint32_t nodeIndex) const +DistributorComponent::storageNodeIsUp(document::BucketSpace bucketSpace, uint32_t nodeIndex) const { - const lib::NodeState& ns = getClusterState().getNodeState( + const lib::NodeState& ns = getClusterStateBundle().getDerivedClusterState(bucketSpace)->getNodeState( lib::Node(lib::NodeType::STORAGE, nodeIndex)); return ns.getState().oneOf(_distributor.getStorageNodeUpStates()); diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.h b/storage/src/vespa/storage/distributor/distributorcomponent.h index 33e86d423e7..184ac768afb 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.h +++ b/storage/src/vespa/storage/distributor/distributorcomponent.h @@ -68,10 +68,10 @@ public: bool ownsBucketInCurrentState(const document::Bucket &bucket) const; /** - * Returns a reference to the current system state. Valid until the next - * time the distributor main thread processes its message queue. + * Returns a reference to the current cluster state bundle. Valid until the + * next time the distributor main thread processes its message queue. */ - const lib::ClusterState& getClusterState() const; + const lib::ClusterStateBundle& getClusterStateBundle() const; /** * Returns the ideal nodes for the given bucket. @@ -86,7 +86,7 @@ public: /** * Returns true if the given storage node is in an "up state". */ - bool storageNodeIsUp(uint32_t nodeIndex) const; + bool storageNodeIsUp(document::BucketSpace bucketSpace, uint32_t nodeIndex) const; /** * Verifies that the given command has been received at the diff --git a/storage/src/vespa/storage/distributor/distributorinterface.h b/storage/src/vespa/storage/distributor/distributorinterface.h index 3445397c17d..17c300fa0a9 100644 --- a/storage/src/vespa/storage/distributor/distributorinterface.h +++ b/storage/src/vespa/storage/distributor/distributorinterface.h @@ -8,6 +8,7 @@ #include <vespa/document/bucket/bucket.h> namespace storage::api { class MergeBucketReply; } +namespace storage::lib { class ClusterStateBundle; } namespace storage { class DistributorConfiguration; class DistributorMetricSet; @@ -21,7 +22,7 @@ class DistributorInterface : public DistributorMessageSender public: virtual PendingMessageTracker& getPendingMessageTracker() = 0; virtual DistributorMetricSet& getMetrics() = 0; - virtual void enableClusterState(const lib::ClusterState& state) = 0; + virtual void enableClusterStateBundle(const lib::ClusterStateBundle& state) = 0; virtual BucketOwnership checkOwnershipInPendingState(const document::Bucket &bucket) const = 0; virtual void notifyDistributionChangeEnabled() = 0; @@ -43,9 +44,9 @@ public: virtual void checkBucketForSplit(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, uint8_t pri) = 0; /** - * @return Returns the current cluster state. + * @return Returns the current cluster state bundle. */ - virtual const lib::ClusterState& getClusterState() const = 0; + virtual const lib::ClusterStateBundle& getClusterStateBundle() const = 0; /** * Returns true if the node is currently initializing. diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.cpp b/storage/src/vespa/storage/distributor/idealstatemanager.cpp index 4018bb88583..773014391fd 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemanager.cpp @@ -68,9 +68,10 @@ bool IdealStateManager::iAmUp() const { Node node(NodeType::DISTRIBUTOR, _distributorComponent.getIndex()); - const lib::State &nodeState = _distributorComponent.getClusterState() - .getNodeState(node).getState(); - const lib::State &clusterState = _distributorComponent.getClusterState().getClusterState(); + // Assume that derived cluster states agree on distributor node being up + const auto &state = *_distributorComponent.getClusterStateBundle().getBaselineClusterState(); + const lib::State &nodeState = state.getNodeState(node).getState(); + const lib::State &clusterState = state.getClusterState(); return (nodeState == lib::State::UP && clusterState == lib::State::UP); } @@ -278,7 +279,7 @@ void IdealStateManager::dump_bucket_space_db_status(document::BucketSpace bucket void IdealStateManager::getBucketStatus(std::ostream& out) const { LOG(debug, "Dumping bucket database valid at cluster state version %u", - _distributorComponent.getDistributor().getClusterState().getVersion()); + _distributorComponent.getDistributor().getClusterStateBundle().getVersion()); for (auto& space : _bucketSpaceRepo) { dump_bucket_space_db_status(space.first, out); diff --git a/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp index 5d93d3e3a5a..1ea077fd1c1 100644 --- a/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp @@ -40,7 +40,7 @@ MultiOperationOperation::sendToBucket( std::vector<MessageTracker::ToSend> createBucketBatch; if (PutOperation::checkCreateBucket(_bucketSpace.getDistribution(), - _manager.getClusterState(), + _bucketSpace.getClusterState(), e, targetNodes, createBucketBatch, @@ -114,12 +114,12 @@ struct BucketOperationList { void MultiOperationOperation::onStart(DistributorMessageSender& sender) { - lib::ClusterState systemState = _manager.getClusterState(); + lib::ClusterState systemState = _bucketSpace.getClusterState(); // Don't do anything if all nodes are down. bool up = false; for (uint16_t i = 0; i < systemState.getNodeCount(lib::NodeType::STORAGE); i++) { - if (_manager.storageNodeIsUp(i)) { + if (_manager.storageNodeIsUp(_msg->getBucket().getBucketSpace(), i)) { up = true; break; } diff --git a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp index 659a7f1d435..de0d1559c1f 100644 --- a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp @@ -196,7 +196,7 @@ PutOperation::insertDatabaseEntryAndScheduleCreateBucket( _bucketSpace.getBucketDatabase().get(lastBucket)); std::vector<uint16_t> idealState( _bucketSpace.getDistribution().getIdealStorageNodes( - _manager.getClusterState(), lastBucket, "ui")); + _bucketSpace.getClusterState(), lastBucket, "ui")); active = ActiveCopy::calculate(idealState, _bucketSpace.getDistribution(), entry); LOG(debug, "Active copies for bucket %s: %s", @@ -261,7 +261,7 @@ PutOperation::onStart(DistributorMessageSender& sender) _msg->getDocumentId().toString().c_str(), bid.toString().c_str()); - lib::ClusterState systemState = _manager.getClusterState(); + lib::ClusterState systemState = _bucketSpace.getClusterState(); // Don't do anything if all nodes are down. bool up = false; @@ -278,7 +278,7 @@ PutOperation::onStart(DistributorMessageSender& sender) lib::IdealNodeCalculatorImpl idealNodeCalculator; idealNodeCalculator.setDistribution(_bucketSpace.getDistribution()); - idealNodeCalculator.setClusterState(_manager.getClusterState()); + idealNodeCalculator.setClusterState(_bucketSpace.getClusterState()); OperationTargetResolverImpl targetResolver( _bucketSpace.getBucketDatabase(), idealNodeCalculator, diff --git a/storage/src/vespa/storage/distributor/operations/external/removelocationoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/removelocationoperation.cpp index 2584244023b..febba2cf16d 100644 --- a/storage/src/vespa/storage/distributor/operations/external/removelocationoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/removelocationoperation.cpp @@ -96,7 +96,7 @@ RemoveLocationOperation::onStart(DistributorMessageSender& sender) "Remove location %s failed since no available nodes found. " "System state is %s", _msg->toString().c_str(), - _manager.getClusterState().toString().c_str()); + _bucketSpace.getClusterState().toString().c_str()); _tracker.fail(sender, api::ReturnCode(api::ReturnCode::OK)); } else { diff --git a/storage/src/vespa/storage/distributor/operations/external/removeoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/removeoperation.cpp index 4b9a7b3f173..0ad3d282cc1 100644 --- a/storage/src/vespa/storage/distributor/operations/external/removeoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/removeoperation.cpp @@ -70,7 +70,7 @@ RemoveOperation::onStart(DistributorMessageSender& sender) "Remove document %s failed since no available nodes found. " "System state is %s", _msg->getDocumentId().toString().c_str(), - _manager.getClusterState().toString().c_str()); + _bucketSpace.getClusterState().toString().c_str()); _tracker.fail(sender, api::ReturnCode(api::ReturnCode::OK)); } else { diff --git a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp index d622c42b321..568bff81a1b 100644 --- a/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/updateoperation.cpp @@ -34,7 +34,7 @@ UpdateOperation::UpdateOperation(DistributorComponent& manager, bool UpdateOperation::anyStorageNodesAvailable() const { - const auto& clusterState(_manager.getClusterState()); + const auto& clusterState(_bucketSpace.getClusterState()); const auto storageNodeCount( clusterState.getNodeCount(lib::NodeType::STORAGE)); diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 9f92d313f1f..c7ba3d8f4cd 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -232,7 +232,7 @@ private: void VisitorOperation::verifyDistributorsAreAvailable() { - const lib::ClusterState& clusterState = _owner.getClusterState(); + const lib::ClusterState& clusterState = _bucketSpace.getClusterState(); if (clusterState.getNodeCount(lib::NodeType::DISTRIBUTOR) == 0) { vespalib::string err(vespalib::make_string( "No distributors available when processing visitor '%s'", @@ -246,7 +246,7 @@ void VisitorOperation::verifyVisitorDistributionBitCount( const document::BucketId& bid) { - const lib::ClusterState& clusterState = _owner.getClusterState(); + const lib::ClusterState& clusterState = _bucketSpace.getClusterState(); if (_msg->getDocumentSelection().length() == 0 && bid.getUsedBits() != clusterState.getDistributionBitCount()) { @@ -786,7 +786,7 @@ VisitorOperation::startNewVisitors(DistributorMessageSender& sender) void VisitorOperation::initializeActiveNodes() { - const lib::ClusterState& clusterState(_owner.getClusterState()); + const lib::ClusterState& clusterState(_bucketSpace.getClusterState()); uint32_t storageNodeCount = clusterState.getNodeCount(lib::NodeType::STORAGE); if (storageNodeCount > _activeNodes.size()) { diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp index 32ea695bd94..170ad8a9ac5 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp @@ -113,7 +113,7 @@ MergeOperation::onStart(DistributorMessageSender& sender) return; } - const lib::ClusterState& clusterState(_manager->getDistributorComponent().getClusterState()); + const lib::ClusterState& clusterState(_bucketSpace->getClusterState()); std::vector<std::unique_ptr<BucketCopy> > newCopies; std::vector<MergeMetaData> nodes; diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp index ed9c8bc222b..2071558628e 100644 --- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp @@ -29,7 +29,7 @@ PendingBucketSpaceDbTransition::PendingBucketSpaceDbTransition(const PendingClus _missingEntries(), _clusterInfo(std::move(clusterInfo)), _outdatedNodes(newClusterState.getNodeCount(NodeType::STORAGE)), - _prevClusterState(_clusterInfo->getClusterState()), + _prevClusterState(distributorBucketSpace.getClusterState()), _newClusterState(newClusterState), _creationTimestamp(creationTimestamp), _pendingClusterState(pendingClusterState), diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 3c96bc55161..a08445ca3d2 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -32,8 +32,8 @@ PendingClusterState::PendingClusterState( api::Timestamp creationTimestamp) : _cmd(newStateCmd), _requestedNodes(newStateCmd->getSystemState().getNodeCount(lib::NodeType::STORAGE)), - _prevClusterState(clusterInfo->getClusterState()), - _newClusterState(newStateCmd->getSystemState()), + _prevClusterStateBundle(clusterInfo->getClusterStateBundle()), + _newClusterStateBundle(newStateCmd->getClusterStateBundle()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), @@ -53,8 +53,8 @@ PendingClusterState::PendingClusterState( DistributorBucketSpaceRepo &bucketSpaceRepo, api::Timestamp creationTimestamp) : _requestedNodes(clusterInfo->getStorageNodeCount()), - _prevClusterState(clusterInfo->getClusterState()), - _newClusterState(clusterInfo->getClusterState()), + _prevClusterStateBundle(clusterInfo->getClusterStateBundle()), + _newClusterStateBundle(clusterInfo->getClusterStateBundle()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), @@ -79,7 +79,7 @@ PendingClusterState::initializeBucketSpaceTransitions(bool distributionChanged, auto pendingTransition = std::make_unique<PendingBucketSpaceDbTransition> (*this, *elem.second, distributionChanged, outdatedNodes, - _clusterInfo, _newClusterState, _creationTimestamp); + _clusterInfo, *_newClusterStateBundle.getDerivedClusterState(elem.first), _creationTimestamp); if (pendingTransition->getBucketOwnershipTransfer()) { _bucketOwnershipTransfer = true; } @@ -99,15 +99,15 @@ PendingClusterState::logConstructionInformation() const "New PendingClusterState constructed with previous cluster " "state '%s', new cluster state '%s', distribution config " "hash: '%s'", - _prevClusterState.toString().c_str(), - _newClusterState.toString().c_str(), + getPrevClusterStateBundleString().c_str(), + getNewClusterStateBundleString().c_str(), distribution.getNodeGraph().getDistributionConfigHash().c_str()); } bool -PendingClusterState::storageNodeUpInNewState(uint16_t node) const +PendingClusterState::storageNodeUpInNewState(document::BucketSpace bucketSpace, uint16_t node) const { - return _newClusterState.getNodeState(Node(NodeType::STORAGE, node)) + return _newClusterStateBundle.getDerivedClusterState(bucketSpace)->getNodeState(Node(NodeType::STORAGE, node)) .getState().oneOf(_clusterInfo->getStorageUpStates()); } @@ -124,7 +124,7 @@ PendingClusterState::getOutdatedNodesMap() const uint16_t PendingClusterState::newStateStorageNodeCount() const { - return _newClusterState.getNodeCount(lib::NodeType::STORAGE); + return _newClusterStateBundle.getBaselineClusterState()->getNodeCount(lib::NodeType::STORAGE); } bool @@ -144,15 +144,15 @@ PendingClusterState::shouldRequestBucketInfo() const bool PendingClusterState::clusterIsDown() const { - return _newClusterState.getClusterState() == lib::State::DOWN; + return _newClusterStateBundle.getBaselineClusterState()->getClusterState() == lib::State::DOWN; } bool PendingClusterState::iAmDown() const { const lib::NodeState& myState( - _newClusterState.getNodeState(Node(NodeType::DISTRIBUTOR, - _sender.getDistributorIndex()))); + _newClusterStateBundle.getBaselineClusterState()->getNodeState(Node(NodeType::DISTRIBUTOR, + _sender.getDistributorIndex()))); return myState.getState() == lib::State::DOWN; } @@ -161,8 +161,8 @@ PendingClusterState::requestNodes() { LOG(debug, "New system state: Old state was %s, new state is %s", - _prevClusterState.toString().c_str(), - _newClusterState.toString().c_str()); + getPrevClusterStateBundleString().c_str(), + getNewClusterStateBundleString().c_str()); requestBucketInfoFromStorageNodesWithChangedState(); } @@ -173,7 +173,7 @@ PendingClusterState::requestBucketInfoFromStorageNodesWithChangedState() for (auto &elem : _pendingTransitions) { const OutdatedNodes &outdatedNodes(elem.second->getOutdatedNodes()); for (uint16_t idx : outdatedNodes) { - if (storageNodeUpInNewState(idx)) { + if (storageNodeUpInNewState(elem.first, idx)) { requestNode(BucketSpaceAndNode(elem.first, idx)); } } @@ -191,14 +191,14 @@ PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode) "and distribution hash '%s'", bucketSpaceAndNode.bucketSpace.getId(), bucketSpaceAndNode.node, - _newClusterState.toString().c_str(), + getNewClusterStateBundleString().c_str(), distributionHash.c_str()); std::shared_ptr<api::RequestBucketInfoCommand> cmd( new api::RequestBucketInfoCommand( bucketSpaceAndNode.bucketSpace, _sender.getDistributorIndex(), - _newClusterState, + *_newClusterStateBundle.getDerivedClusterState(bucketSpaceAndNode.bucketSpace), distributionHash)); cmd->setPriority(api::StorageMessage::HIGH); @@ -294,7 +294,7 @@ PendingClusterState::printXml(vespalib::XmlOutputStream& xos) const { using namespace vespalib::xml; xos << XmlTag("systemstate_pending") - << XmlAttribute("state", _newClusterState); + << XmlAttribute("state", *_newClusterStateBundle.getBaselineClusterState()); for (auto &elem : _sentMessages) { xos << XmlTag("pending") << XmlAttribute("node", elem.second.node) @@ -306,8 +306,8 @@ PendingClusterState::printXml(vespalib::XmlOutputStream& xos) const PendingClusterState::Summary PendingClusterState::getSummary() const { - return Summary(_prevClusterState.toString(), - _newClusterState.toString(), + return Summary(getPrevClusterStateBundleString(), + getNewClusterStateBundleString(), (_clock.getTimeInMicros().getTime() - _creationTimestamp)); } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h index 2d75c795745..b96ba8cbbd7 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.h +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h @@ -8,6 +8,7 @@ #include <vespa/storageapi/message/state.h> #include <vespa/storageframework/generic/clock/clock.h> #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include <vespa/vespalib/util/xmlserializable.h> #include "outdated_nodes_map.h" #include <unordered_map> @@ -107,11 +108,8 @@ public: return _cmd; } - const lib::ClusterState& getNewClusterState() const { - return _newClusterState; - } - const lib::ClusterState& getPrevClusterState() const { - return _prevClusterState; + const lib::ClusterStateBundle& getNewClusterStateBundle() const { + return _newClusterStateBundle; } /** @@ -184,7 +182,13 @@ private: bool clusterIsDown() const; bool iAmDown() const; - bool storageNodeUpInNewState(uint16_t node) const; + bool storageNodeUpInNewState(document::BucketSpace bucketSpace, uint16_t node) const; + std::string getNewClusterStateBundleString() const { + return _newClusterStateBundle.getBaselineClusterState()->toString(); + } + std::string getPrevClusterStateBundleString() const { + return _prevClusterStateBundle.getBaselineClusterState()->toString(); + } std::shared_ptr<api::SetSystemStateCommand> _cmd; @@ -192,8 +196,8 @@ private: std::vector<bool> _requestedNodes; std::deque<std::pair<framework::MilliSecTime, BucketSpaceAndNode> > _delayedRequests; - lib::ClusterState _prevClusterState; - lib::ClusterState _newClusterState; + lib::ClusterStateBundle _prevClusterStateBundle; + lib::ClusterStateBundle _newClusterStateBundle; const framework::Clock& _clock; ClusterInformation::CSP _clusterInfo; diff --git a/storage/src/vespa/storage/distributor/simpleclusterinformation.h b/storage/src/vespa/storage/distributor/simpleclusterinformation.h index 2946abf620c..1247d425e50 100644 --- a/storage/src/vespa/storage/distributor/simpleclusterinformation.h +++ b/storage/src/vespa/storage/distributor/simpleclusterinformation.h @@ -11,10 +11,10 @@ class SimpleClusterInformation : public ClusterInformation { public: SimpleClusterInformation(uint16_t myIndex, - const lib::ClusterState& clusterState, + const lib::ClusterStateBundle& clusterStateBundle, const char* storageUpStates) : _myIndex(myIndex), - _clusterState(clusterState), + _clusterStateBundle(clusterStateBundle), _storageUpStates(storageUpStates) {} @@ -22,8 +22,8 @@ public: return _myIndex; } - const lib::ClusterState& getClusterState() const override { - return _clusterState; + const lib::ClusterStateBundle& getClusterStateBundle() const override { + return _clusterStateBundle; } const char* getStorageUpStates() const override { @@ -32,7 +32,7 @@ public: private: uint16_t _myIndex; - lib::ClusterState _clusterState; + lib::ClusterStateBundle _clusterStateBundle; const char* _storageUpStates; }; diff --git a/storage/src/vespa/storage/distributor/statechecker.cpp b/storage/src/vespa/storage/distributor/statechecker.cpp index f959e5a80fb..12dbd7f3c52 100644 --- a/storage/src/vespa/storage/distributor/statechecker.cpp +++ b/storage/src/vespa/storage/distributor/statechecker.cpp @@ -65,7 +65,7 @@ StateChecker::Context::Context(const DistributorComponent& c, const document::Bucket &bucket_) : bucket(bucket_), siblingBucket(c.getSibling(bucket.getBucketId())), - systemState(c.getClusterState()), + systemState(distributorBucketSpace.getClusterState()), distributorConfig(c.getDistributor().getConfig()), distribution(distributorBucketSpace.getDistribution()), gcTimeCalculator(c.getDistributor().getBucketIdHasher(), |