diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2018-02-27 09:56:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-27 09:56:35 +0100 |
commit | c90f477b4bb2ee7371c22f8f495eefe2e163f5c4 (patch) | |
tree | 7d50599e781b53f4a8581ea8d1fc531b7ee17c02 | |
parent | 73d21caf21f0ef72f4ed02c9527010867c0a7afd (diff) | |
parent | e6e2aeb2cb91ea389ccc4bb4c3eefad41e28f8b7 (diff) |
Merge branch 'master' into jvenstad/argh-2
53 files changed, 648 insertions, 274 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java index da5ea7f975d..7648432dc11 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java @@ -9,6 +9,10 @@ import java.util.Objects; * @author bjorncs */ public class ClusterId { + // Common cluster IDs + public static final ClusterId ADMIN = new ClusterId("admin"); + public static final ClusterId NODE_ADMIN = new ClusterId("node-admin"); + public static final ClusterId ROUTING = new ClusterId("routing"); private final String id; diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java index 784ff9d1e38..0054264d42f 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java @@ -9,6 +9,9 @@ import java.util.Objects; * @author bjorncs */ public class ServiceType { + // Common service types. + public static final ServiceType CONTAINER = new ServiceType("container"); + public static final ServiceType SLOBROK = new ServiceType("slobrok"); private final String id; diff --git a/configserver/src/main/resources/configserver-app/services.xml b/configserver/src/main/resources/configserver-app/services.xml index a2ef382a5a7..a129d7288ce 100644 --- a/configserver/src/main/resources/configserver-app/services.xml +++ b/configserver/src/main/resources/configserver-app/services.xml @@ -53,6 +53,7 @@ <preprocess:include file='hosted-vespa/scoreboard.xml' required='false' /> <preprocess:include file='controller/container.xml' required='false' /> <component id="com.yahoo.vespa.service.monitor.internal.SlobrokMonitorManagerImpl" bundle="service-monitor" /> + <component id="com.yahoo.vespa.service.monitor.internal.HealthMonitorManager" bundle="service-monitor" /> <component id="com.yahoo.vespa.service.monitor.internal.ServiceMonitorImpl" bundle="service-monitor" /> <component id="com.yahoo.vespa.orchestrator.ServiceMonitorInstanceLookupService" bundle="orchestrator" /> <component id="com.yahoo.vespa.orchestrator.status.ZookeeperStatusService" bundle="orchestrator" /> diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index 0ec00f61311..303f5d5484b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -35,10 +35,12 @@ import com.yahoo.vespa.serviceview.bindings.ApplicationView; import java.net.URI; import java.time.Clock; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -208,10 +210,21 @@ public class Controller extends AbstractComponent { " to " + printableVersion(newStatus.systemVersion())); } curator.writeVersionStatus(newStatus); + // Removes confidence overrides for versions that no longer exist in the system + removeConfidenceOverride(version -> newStatus.versions().stream() + .noneMatch(vespaVersion -> vespaVersion.versionNumber() + .equals(version))); } /** Returns the latest known version status. Calling this is free but the status may be slightly out of date. */ public VersionStatus versionStatus() { return curator.readVersionStatus(); } + + /** Remove confidence override for versions matching given filter */ + public void removeConfidenceOverride(Predicate<Version> filter) { + Map<Version, VespaVersion.Confidence> overrides = new LinkedHashMap<>(curator().readConfidenceOverrides()); + overrides.keySet().removeIf(filter); + curator.writeConfidenceOverrides(overrides); + } /** Returns the current system version: The controller should drive towards running all applications on this version */ public Version systemVersion() { @@ -244,7 +257,7 @@ public class Controller extends AbstractComponent { return nodeRepositoryClient; } - private String printableVersion(Optional<VespaVersion> vespaVersion) { + private static String printableVersion(Optional<VespaVersion> vespaVersion) { return vespaVersion.map(v -> v.versionNumber().toFullString()).orElse("Unknown"); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 75f348904dd..8c661e7db9d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -9,11 +9,14 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; @@ -42,12 +45,12 @@ public class Upgrader extends Maintainer { public void maintain() { // Determine target versions for each upgrade policy Optional<Version> canaryTarget = controller().versionStatus().systemVersion().map(VespaVersion::versionNumber); - Optional<Version> defaultTarget = newestVersionWithConfidence(VespaVersion.Confidence.normal); - Optional<Version> conservativeTarget = newestVersionWithConfidence(VespaVersion.Confidence.high); + Optional<Version> defaultTarget = newestVersionWithConfidence(Confidence.normal); + Optional<Version> conservativeTarget = newestVersionWithConfidence(Confidence.high); // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation for (VespaVersion version : controller().versionStatus().versions()) { - if (version.confidence() == VespaVersion.Confidence.broken) + if (version.confidence() == Confidence.broken) cancelUpgradesOf(applications().without(UpgradePolicy.canary).upgradingTo(version.versionNumber()), version.versionNumber() + " is broken"); } @@ -67,7 +70,7 @@ public class Upgrader extends Maintainer { conservativeTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.conservative), target)); } - private Optional<Version> newestVersionWithConfidence(VespaVersion.Confidence confidence) { + private Optional<Version> newestVersionWithConfidence(Confidence confidence) { return reversed(controller().versionStatus().versions()).stream() .filter(v -> v.confidence().equalOrHigherThan(confidence)) .findFirst() @@ -124,18 +127,20 @@ public class Upgrader extends Maintainer { curator.writeUpgradesPerMinute(n); } - /** - * Returns whether to ignore confidence calculations when upgrading - */ - public boolean ignoreConfidence() { - return curator.readIgnoreConfidence(); + /** Override confidence for given version. This will cause the computed confidence to be ignored */ + public void overrideConfidence(Version version, Confidence confidence) { + Map<Version, Confidence> overrides = new LinkedHashMap<>(curator.readConfidenceOverrides()); + overrides.put(version, confidence); + curator.writeConfidenceOverrides(overrides); } - /** - * Controls whether to ignore confidence calculations or not - */ - public void ignoreConfidence(boolean value) { - curator.writeIgnoreConfidence(value); + /** Returns all confidence overrides */ + public Map<Version, Confidence> confidenceOverrides() { + return curator.readConfidenceOverrides(); } + /** Remove confidence override for given version */ + public void removeConfidenceOverride(Version version) { + controller().removeConfidenceOverride(v -> v.equals(version)); + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java new file mode 100644 index 00000000000..c56d8b3849c --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java @@ -0,0 +1,42 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + + +import com.yahoo.component.Version; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.ObjectTraverser; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Serializes overrides of version confidence. + * + * @author mpolden + */ +public class ConfidenceOverrideSerializer { + + private final static String overridesField = "overrides"; + + public Slime toSlime(Map<Version, Confidence> overrides) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + Cursor object = root.setObject(overridesField); + overrides.forEach((version, confidence) -> object.setString(version.toString(), confidence.name())); + return slime; + } + + public Map<Version, Confidence> fromSlime(Slime slime) { + Cursor root = slime.get(); + Cursor overridesObject = root.field(overridesField); + Map<Version, Confidence> overrides = new LinkedHashMap<>(); + overridesObject.traverse((ObjectTraverser) (name, value) -> { + overrides.put(Version.fromString(name), Confidence.valueOf(value.asString())); + }); + return Collections.unmodifiableMap(overrides); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index caf2af5114c..bef33e739be 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.persistence; import com.google.inject.Inject; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.path.Path; import com.yahoo.transaction.NestedTransaction; @@ -11,6 +12,7 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import java.io.IOException; import java.io.UncheckedIOException; @@ -21,6 +23,7 @@ import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -157,19 +160,7 @@ public class CuratorDb { } curator.set(upgradesPerMinutePath(), ByteBuffer.allocate(Double.BYTES).putDouble(n).array()); } - - public boolean readIgnoreConfidence() { - Optional<byte[]> value = curator.getData(ignoreConfidencePath()); - if ( ! value.isPresent() || value.get().length == 0) { - return false; // Default if value has never been written - } - return ByteBuffer.wrap(value.get()).getInt() == 1; - } - - public void writeIgnoreConfidence(boolean value) { - curator.set(ignoreConfidencePath(), ByteBuffer.allocate(Integer.BYTES).putInt(value ? 1 : 0).array()); - } - + public void writeVersionStatus(VersionStatus status) { VersionStatusSerializer serializer = new VersionStatusSerializer(); try { @@ -188,30 +179,57 @@ public class CuratorDb { return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); } + public void writeConfidenceOverrides(Map<Version, VespaVersion.Confidence> overrides) { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + try { + curator.set(confidenceOverridesPath(), SlimeUtils.toJsonBytes(serializer.toSlime(overrides))); + } catch (IOException e) { + throw new UncheckedIOException("Failed to serialize confidence overrides", e); + } + } + + public Map<Version, VespaVersion.Confidence> readConfidenceOverrides() { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + Optional<byte[]> data = curator.getData(confidenceOverridesPath()); + if (!data.isPresent() || data.get().length == 0) { + return Collections.emptyMap(); + } + return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); + } + + // The following methods are called by internal code + + @SuppressWarnings("unused") public Optional<byte[]> readProvisionState(String provisionId) { return curator.getData(provisionStatePath(provisionId)); } + @SuppressWarnings("unused") public void writeProvisionState(String provisionId, byte[] data) { curator.set(provisionStatePath(provisionId), data); } + @SuppressWarnings("unused") public List<String> readProvisionStateIds() { return curator.getChildren(provisionStatePath()); } + @SuppressWarnings("unused") public Optional<byte[]> readVespaServerPool() { return curator.getData(vespaServerPoolPath()); } + @SuppressWarnings("unused") public void writeVespaServerPool(byte[] data) { curator.set(vespaServerPoolPath(), data); } + @SuppressWarnings("unused") public Optional<byte[]> readOpenStackServerPool() { return curator.getData(openStackServerPoolPath()); } + @SuppressWarnings("unused") public void writeOpenStackServerPool(byte[] data) { curator.set(openStackServerPoolPath(), data); } @@ -242,37 +260,39 @@ public class CuratorDb { return lockPath; } - private Path inactiveJobsPath() { + private static Path inactiveJobsPath() { return root.append("inactiveJobs"); } - private Path jobQueuePath(DeploymentJobs.JobType jobType) { + private static Path jobQueuePath(DeploymentJobs.JobType jobType) { return root.append("jobQueues").append(jobType.name()); } - private Path upgradesPerMinutePath() { + private static Path upgradesPerMinutePath() { return root.append("upgrader").append("upgradesPerMinute"); } - private Path ignoreConfidencePath() { - return root.append("upgrader").append("ignoreConfidence"); + private static Path confidenceOverridesPath() { + return root.append("upgrader").append("confidenceOverrides"); } - private Path versionStatusPath() { return root.append("versionStatus"); } + private static Path versionStatusPath() { + return root.append("versionStatus"); + } - private Path provisionStatePath() { + private static Path provisionStatePath() { return root.append("provisioning").append("states"); } - private Path provisionStatePath(String provisionId) { + private static Path provisionStatePath(String provisionId) { return provisionStatePath().append(provisionId); } - private Path vespaServerPoolPath() { + private static Path vespaServerPoolPath() { return root.append("vespaServerPool"); } - private Path openStackServerPoolPath() { + private static Path openStackServerPoolPath() { return root.append("openStackServerPool"); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java index 6b60b49e1ef..18648d4a488 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java @@ -31,7 +31,7 @@ public class VersionStatusSerializer { // VespaVersion fields private static final String releaseCommitField = "releaseCommit"; - private static final String releasedAtField = "releasedAt"; + private static final String committedAtField = "releasedAt"; private static final String isCurrentSystemVersionField = "isCurrentSystemVersion"; private static final String deploymentStatisticsField = "deploymentStatistics"; private static final String confidenceField = "confidence"; @@ -61,7 +61,7 @@ public class VersionStatusSerializer { private void vespaVersionToSlime(VespaVersion version, Cursor object) { object.setString(releaseCommitField, version.releaseCommit()); - object.setLong(releasedAtField, version.releasedAt().toEpochMilli()); + object.setLong(committedAtField, version.committedAt().toEpochMilli()); object.setBool(isCurrentSystemVersionField, version.isCurrentSystemVersion()); deploymentStatisticsToSlime(version.statistics(), object.setObject(deploymentStatisticsField)); object.setString(confidenceField, version.confidence().name()); @@ -92,7 +92,7 @@ public class VersionStatusSerializer { private VespaVersion vespaVersionFromSlime(Inspector object) { return new VespaVersion(deploymentStatisticsFromSlime(object.field(deploymentStatisticsField)), object.field(releaseCommitField).asString(), - Instant.ofEpochMilli(object.field(releasedAtField).asLong()), + Instant.ofEpochMilli(object.field(committedAtField).asLong()), object.field(isCurrentSystemVersionField).asBool(), configServersFromSlime(object.field(configServersField)), VespaVersion.Confidence.valueOf(object.field(confidenceField).asString()) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java index a9eaaf4048c..6fefb7099f1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java @@ -1,12 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.restapi.controller; +import com.yahoo.component.Version; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; import com.yahoo.io.IOUtils; import com.yahoo.slime.Inspector; -import com.yahoo.text.Utf8; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance; import com.yahoo.vespa.hosted.controller.maintenance.Upgrader; @@ -14,11 +14,13 @@ import com.yahoo.vespa.hosted.controller.restapi.ErrorResponse; import com.yahoo.vespa.hosted.controller.restapi.MessageResponse; import com.yahoo.vespa.hosted.controller.restapi.Path; import com.yahoo.vespa.hosted.controller.restapi.ResourceResponse; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.util.Scanner; import java.util.logging.Level; /** @@ -67,15 +69,15 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse post(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return overrideConfidence(request, path.get("version")); return notFound(path); } private HttpResponse delete(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return removeConfidenceOverride(path.get("version")); return notFound(path); } @@ -100,24 +102,41 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse configureUpgrader(HttpRequest request) { String upgradesPerMinuteField = "upgradesPerMinute"; - String ignoreConfidenceField = "ignoreConfidence"; + String confidenceOverrideField = "confidenceOverride"; byte[] jsonBytes = toJsonBytes(request.getData()); Inspector inspect = SlimeUtils.jsonToSlime(jsonBytes).get(); Upgrader upgrader = maintenance.upgrader(); + if (inspect.field(upgradesPerMinuteField).valid()) { upgrader.setUpgradesPerMinute(inspect.field(upgradesPerMinuteField).asDouble()); - } else if (inspect.field(ignoreConfidenceField).valid()) { - upgrader.ignoreConfidence(inspect.field(ignoreConfidenceField).asBool()); } else { - return ErrorResponse.badRequest("Unable to configure upgrader with data in request: '" + - Utf8.toString(jsonBytes) + "'"); + return ErrorResponse.badRequest("No such modifiable field(s)"); } return new UpgraderResponse(maintenance.upgrader()); } - private byte[] toJsonBytes(InputStream jsonStream) { + private HttpResponse removeConfidenceOverride(String version) { + maintenance.upgrader().removeConfidenceOverride(Version.fromString(version)); + return new UpgraderResponse(maintenance.upgrader()); + } + + private HttpResponse overrideConfidence(HttpRequest request, String version) { + Confidence confidence = Confidence.valueOf(asString(request.getData())); + maintenance.upgrader().overrideConfidence(Version.fromString(version), confidence); + return new UpgraderResponse(maintenance.upgrader()); + } + + private static String asString(InputStream in) { + Scanner scanner = new Scanner(in).useDelimiter("\\A"); + if (scanner.hasNext()) { + return scanner.next(); + } + return ""; + } + + private static byte[] toJsonBytes(InputStream jsonStream) { try { return IOUtils.readBytes(jsonStream, 1000 * 1000); } catch (IOException e) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java index 0e6e0030ecf..beb6c98e447 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java @@ -27,7 +27,11 @@ public class UpgraderResponse extends HttpResponse { Slime slime = new Slime(); Cursor root = slime.setObject(); root.setDouble("upgradesPerMinute", upgrader.upgradesPerMinute()); - root.setBool("ignoreConfidence", upgrader.ignoreConfidence()); + Cursor array = root.setArray("confidenceOverrides"); + upgrader.confidenceOverrides().forEach((version, confidence) -> { + Cursor object = array.addObject(); + object.setString(version.toString(), confidence.name()); + }); new JsonFormat(true).encode(outputStream, slime); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 8338d341a2b..f9d3901765f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -84,7 +84,7 @@ public class DeploymentApiHandler extends LoggingRequestHandler { versionObject.setString("version", version.versionNumber().toString()); versionObject.setString("confidence", version.confidence().name()); versionObject.setString("commit", version.releaseCommit()); - versionObject.setLong("date", version.releasedAt().toEpochMilli()); + versionObject.setLong("date", version.committedAt().toEpochMilli()); versionObject.setBool("controllerVersion", version.isSelfVersion()); versionObject.setBool("systemVersion", version.isCurrentSystemVersion()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java index 50b810afeed..3a8d74d2c2e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java @@ -111,7 +111,7 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { Cursor cursor = slime.setObject(); cursor.setString("version", version.versionNumber().toString()); cursor.setString("sha", version.releaseCommit()); - cursor.setLong("date", version.releasedAt().toEpochMilli()); + cursor.setLong("date", version.committedAt().toEpochMilli()); return new SlimeJsonResponse(slime); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java index 9bc7b7a22d8..d628489bc29 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java @@ -177,18 +177,19 @@ public class VersionStatus { Collection<String> configServerHostnames, Controller controller) { GitSha gitSha = controller.gitHub().getCommit(VESPA_REPO_OWNER, VESPA_REPO, statistics.version().toFullString()); - Instant releasedAt = Instant.ofEpochMilli(gitSha.commit.author.date.getTime()); // commitedAt ... - VespaVersion.Confidence confidence; - // Always compute confidence for system version - if (isSystemVersion) { - confidence = VespaVersion.confidenceFrom(statistics, controller); - } else { - // Keep existing confidence for non-system versions if already computed - confidence = confidenceFor(statistics.version(), controller) - .orElse(VespaVersion.confidenceFrom(statistics, controller)); + Instant committedAt = Instant.ofEpochMilli(gitSha.commit.author.date.getTime()); + VespaVersion.Confidence confidence = controller.curator().readConfidenceOverrides().get(statistics.version()); + // Compute confidence if there's no override + if (confidence == null) { + if (isSystemVersion) { // Always compute confidence for system version + confidence = VespaVersion.confidenceFrom(statistics, controller); + } else { // Keep existing confidence for non-system versions if already computed + confidence = confidenceFor(statistics.version(), controller) + .orElse(VespaVersion.confidenceFrom(statistics, controller)); + } } return new VespaVersion(statistics, - gitSha.sha, releasedAt, + gitSha.sha, committedAt, isSystemVersion, configServerHostnames, confidence diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index ea89a70543c..1aa94507b61 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -6,7 +6,6 @@ import com.yahoo.component.Version; import com.yahoo.component.Vtag; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.application.ApplicationList; -import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Instant; import java.util.Collection; @@ -25,18 +24,18 @@ import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; public class VespaVersion implements Comparable<VespaVersion> { private final String releaseCommit; - private final Instant releasedAt; + private final Instant committedAt; private final boolean isCurrentSystemVersion; private final DeploymentStatistics statistics; private final ImmutableSet<String> configServerHostnames; private final Confidence confidence; - public VespaVersion(DeploymentStatistics statistics, String releaseCommit, Instant releasedAt, + public VespaVersion(DeploymentStatistics statistics, String releaseCommit, Instant committedAt, boolean isCurrentSystemVersion, Collection<String> configServerHostnames, Confidence confidence) { this.statistics = statistics; this.releaseCommit = releaseCommit; - this.releasedAt = releasedAt; + this.committedAt = committedAt; this.isCurrentSystemVersion = isCurrentSystemVersion; this.configServerHostnames = ImmutableSet.copyOf(configServerHostnames); this.confidence = confidence; @@ -57,7 +56,7 @@ public class VespaVersion implements Comparable<VespaVersion> { return Confidence.broken; // 'broken' if 4 non-canary was broken by this, and that is at least 10% of all - if (nonCanaryApplicationsBroken(statistics.version(), failingOnThis, productionOnThis, controller.curator())) + if (nonCanaryApplicationsBroken(statistics.version(), failingOnThis, productionOnThis)) return Confidence.broken; // 'low' unless all canary applications are upgraded @@ -79,7 +78,7 @@ public class VespaVersion implements Comparable<VespaVersion> { public String releaseCommit() { return releaseCommit; } /** Returns the time of the release commit */ - public Instant releasedAt() { return releasedAt; } + public Instant committedAt() { return committedAt; } /** Statistics about deployment of this version */ public DeploymentStatistics statistics() { return statistics; } @@ -143,12 +142,11 @@ public class VespaVersion implements Comparable<VespaVersion> { private static boolean nonCanaryApplicationsBroken(Version version, ApplicationList failingOnThis, - ApplicationList productionOnThis, - CuratorDb curator) { + ApplicationList productionOnThis) { ApplicationList failingNonCanaries = failingOnThis.without(UpgradePolicy.canary).startedFailingOn(version); ApplicationList productionNonCanaries = productionOnThis.without(UpgradePolicy.canary); - if (productionNonCanaries.size() + failingNonCanaries.size() == 0 || curator.readIgnoreConfidence()) return false; + if (productionNonCanaries.size() + failingNonCanaries.size() == 0) return false; // 'broken' if 4 non-canary was broken by this, and that is at least 10% of all int brokenByThisVersion = failingNonCanaries.size(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 98afd6b61a8..22730cd2fb2 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -309,7 +309,7 @@ public class ControllerTest { for (int i = 0; i < versions.size(); i++) { VespaVersion c = versions.get(i); if (c.isCurrentSystemVersion()) - versions.set(i, new VespaVersion(c.statistics(), c.releaseCommit(), c.releasedAt(), + versions.set(i, new VespaVersion(c.statistics(), c.releaseCommit(), c.committedAt(), false, c.configServerHostnames(), c.confidence())); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java index c668bde0d40..05b671baea0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java @@ -44,7 +44,7 @@ public class VersionStatusSerializerTest { VespaVersion a = status.versions().get(i); VespaVersion b = deserialized.versions().get(i); assertEquals(a.releaseCommit(), b.releaseCommit()); - assertEquals(a.releasedAt(), b.releasedAt()); + assertEquals(a.committedAt(), b.committedAt()); assertEquals(a.isCurrentSystemVersion(), b.isCurrentSystemVersion()); assertEquals(a.statistics(), b.statistics()); assertEquals(a.configServerHostnames(), b.configServerHostnames()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java index c7128bb4cfc..e5f3af8c06a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java @@ -47,32 +47,43 @@ public class ControllerApiTest extends ControllerContainerTest { // Get current configuration tester.assertResponse(authenticatedRequest("http://localhost:8080/controller/v1/jobs/upgrader", new byte[0], Request.Method.GET), - "{\"upgradesPerMinute\":0.5,\"ignoreConfidence\":false}", + "{\"upgradesPerMinute\":0.5,\"confidenceOverrides\":[]}", 200); // Set invalid configuration - ; tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":-1}", Request.Method.PATCH), "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Upgrades per minute must be >= 0\"}", 400); - // Unrecognized field + // Ignores unrecognized field tester.assertResponse( - hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":bar}", Request.Method.PATCH), - "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Unable to configure upgrader with data in request: '{\\\"foo\\\":bar}'\"}", + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":\"bar\"}", Request.Method.PATCH), + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"No such modifiable field(s)\"}", 400); - // Patch configuration + // Set upgrades per minute tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":42.0}", Request.Method.PATCH), - "{\"upgradesPerMinute\":42.0,\"ignoreConfidence\":false}", + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[]}", 200); - // Patch configuration + // Override confidence tester.assertResponse( - hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"ignoreConfidence\":true}", Request.Method.PATCH), - "{\"upgradesPerMinute\":42.0,\"ignoreConfidence\":true}", + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"}]}", + 200); + + // Override confidence for another version + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.43", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"},{\"6.43\":\"broken\"}]}", + 200); + + // Remove first override + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "", Request.Method.DELETE), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.43\":\"broken\"}]}", 200); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index a56ab028233..4de3b9abd5b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -83,7 +83,7 @@ public class DeploymentApiTest extends ControllerContainerTest { if ( ! version.configServerHostnames().isEmpty()) version = new VespaVersion(version.statistics(), version.releaseCommit(), - version.releasedAt(), + version.committedAt(), version.isCurrentSystemVersion(), ImmutableSet.of("config1.test", "config2.test"), VespaVersion.confidenceFrom(version.statistics(), controller) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java index 868ea50128d..27e26e3267a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java @@ -243,44 +243,31 @@ public class VersionStatusTest { } @Test - public void testIgnoreConfidence() { + public void testConfidenceOverride() { DeploymentTester tester = new DeploymentTester(); - Version version0 = new Version("5.0"); tester.upgradeSystem(version0); - // Setup applications - all running on version0 - Application canary0 = tester.createAndDeploy("canary0", 1, "canary"); - Application canary1 = tester.createAndDeploy("canary1", 2, "canary"); - Application default0 = tester.createAndDeploy("default0", 3, "default"); - Application default1 = tester.createAndDeploy("default1", 4, "default"); - Application default2 = tester.createAndDeploy("default2", 5, "default"); - Application default3 = tester.createAndDeploy("default3", 6, "default"); - Application default4 = tester.createAndDeploy("default4", 7, "default"); + // Create and deploy application on current version + Application app = tester.createAndDeploy("app", 1, "canary"); + tester.updateVersionStatus(); + assertEquals(Confidence.high, confidence(tester.controller(), version0)); - // New version is released - Version version1 = new Version("5.1"); - tester.upgradeSystem(version1); + // Override confidence + tester.upgrader().overrideConfidence(version0, Confidence.broken); + tester.updateVersionStatus(); + assertEquals(Confidence.broken, confidence(tester.controller(), version0)); - // All canaries upgrade successfully, 1 default apps ok, 3 default apps fail - tester.completeUpgrade(canary0, version1, "canary"); - tester.completeUpgrade(canary1, version1, "canary"); + // New version is released and application upgrades + Version version1 = new Version("5.1"); tester.upgradeSystem(version1); - tester.completeUpgrade(default0, version1, "default"); - tester.completeUpgradeWithError(default1, version1, "default", stagingTest); - tester.completeUpgradeWithError(default2, version1, "default", stagingTest); - tester.completeUpgradeWithError(default3, version1, "default", stagingTest); - tester.completeUpgradeWithError(default4, version1, "default", stagingTest); + tester.completeUpgrade(app, version1, "canary"); tester.updateVersionStatus(); - assertEquals("Canaries have upgraded, 1 of 4 default apps failing: Broken", - Confidence.broken, confidence(tester.controller(), version1)); + assertEquals(Confidence.high, confidence(tester.controller(), version1)); - // Same as above, but ignore confidence calculations, will force normal confidence - tester.controllerTester().curator().writeIgnoreConfidence(true); - tester.updateVersionStatus(); - assertEquals("Canaries have upgraded, 1 of 4 default apps failing, but confidence ignored: Low", - Confidence.normal, confidence(tester.controller(), version1)); - tester.controllerTester().curator().writeIgnoreConfidence(false); + // Stale override was removed + assertFalse("Stale override removed", tester.controller().curator().readConfidenceOverrides() + .keySet().contains(version0)); } private Confidence confidence(Controller controller, Version version) { diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java index c16256df73d..7f0227df4c5 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/resources/InstanceResource.java @@ -6,6 +6,7 @@ import com.yahoo.container.jaxrs.annotation.Component; import com.yahoo.jrt.slobrok.api.Mirror; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceStatus; @@ -15,7 +16,7 @@ import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.restapi.wire.SlobrokEntryResponse; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.StatusService; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import javax.inject.Inject; import javax.ws.rs.GET; @@ -48,16 +49,16 @@ public class InstanceResource { public static final String DEFAULT_SLOBROK_PATTERN = "**"; private final StatusService statusService; - private final SlobrokMonitorManager slobrokMonitorManager; + private final SlobrokApi slobrokApi; private final InstanceLookupService instanceLookupService; @Inject public InstanceResource(@Component InstanceLookupService instanceLookupService, @Component StatusService statusService, - @Component SlobrokMonitorManager slobrokMonitorManager) { + @Component SlobrokApi slobrokApi) { this.instanceLookupService = instanceLookupService; this.statusService = statusService; - this.slobrokMonitorManager = slobrokMonitorManager; + this.slobrokApi = slobrokApi; } @GET @@ -96,7 +97,7 @@ public class InstanceResource { pattern = DEFAULT_SLOBROK_PATTERN; } - List<Mirror.Entry> entries = slobrokMonitorManager.lookup(applicationId, pattern); + List<Mirror.Entry> entries = slobrokApi.lookup(applicationId, pattern); return entries.stream() .map(entry -> new SlobrokEntryResponse(entry.getName(), entry.getSpec())) .collect(Collectors.toList()); @@ -107,11 +108,16 @@ public class InstanceResource { @Produces(MediaType.APPLICATION_JSON) public ServiceStatus getServiceStatus( @PathParam("instanceId") String instanceId, + @QueryParam("clusterId") String clusterIdString, @QueryParam("serviceType") String serviceTypeString, @QueryParam("configId") String configIdString) { ApplicationInstanceReference reference = parseInstanceId(instanceId); ApplicationId applicationId = OrchestratorUtil.toApplicationId(reference); + if (clusterIdString == null) { + throwBadRequest("Missing clusterId query parameter"); + } + if (serviceTypeString == null) { throwBadRequest("Missing serviceType query parameter"); } @@ -120,10 +126,11 @@ public class InstanceResource { throwBadRequest("Missing configId query parameter"); } + ClusterId clusterId = new ClusterId(clusterIdString); ServiceType serviceType = new ServiceType(serviceTypeString); ConfigId configId = new ConfigId(configIdString); - return slobrokMonitorManager.getStatus(applicationId, serviceType, configId); + return slobrokApi.getStatus(applicationId, clusterId, serviceType, configId); } static ApplicationInstanceReference parseInstanceId(String instanceIdString) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java index 42b5b70ab55..d7255327ba6 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/InstanceResourceTest.java @@ -4,11 +4,12 @@ package com.yahoo.vespa.orchestrator.resources; import com.fasterxml.jackson.databind.ObjectMapper; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.restapi.wire.SlobrokEntryResponse; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import org.junit.Test; import javax.ws.rs.WebApplicationException; @@ -27,12 +28,13 @@ public class InstanceResourceTest { private static final List<Mirror.Entry> ENTRIES = Arrays.asList( new Mirror.Entry("name1", "spec1"), new Mirror.Entry("name2", "spec2")); + private static final ClusterId CLUSTER_ID = new ClusterId("cluster-id"); - private final SlobrokMonitorManager slobrokMonitorManager = mock(SlobrokMonitorManager.class); + private final SlobrokApi slobrokApi = mock(SlobrokApi.class); private final InstanceResource resource = new InstanceResource( null, null, - slobrokMonitorManager); + slobrokApi); @Test public void testGetSlobrokEntries() throws Exception { @@ -49,31 +51,32 @@ public class InstanceResourceTest { ServiceType serviceType = new ServiceType("serviceType"); ConfigId configId = new ConfigId("configId"); ServiceStatus serviceStatus = ServiceStatus.UP; - when(slobrokMonitorManager.getStatus(APPLICATION_ID, serviceType, configId)) + when(slobrokApi.getStatus(APPLICATION_ID, CLUSTER_ID, serviceType, configId)) .thenReturn(serviceStatus); ServiceStatus actualServiceStatus = resource.getServiceStatus( APPLICATION_INSTANCE_REFERENCE, + CLUSTER_ID.s(), serviceType.s(), configId.s()); - verify(slobrokMonitorManager).getStatus(APPLICATION_ID, serviceType, configId); + verify(slobrokApi).getStatus(APPLICATION_ID, CLUSTER_ID, serviceType, configId); assertEquals(serviceStatus, actualServiceStatus); } @Test(expected = WebApplicationException.class) public void testBadRequest() { - resource.getServiceStatus(APPLICATION_INSTANCE_REFERENCE, null, null); + resource.getServiceStatus(APPLICATION_INSTANCE_REFERENCE, CLUSTER_ID.s(), null, null); } private void testGetSlobrokEntriesWith(String pattern, String expectedLookupPattern) throws Exception{ - when(slobrokMonitorManager.lookup(APPLICATION_ID, expectedLookupPattern)) + when(slobrokApi.lookup(APPLICATION_ID, expectedLookupPattern)) .thenReturn(ENTRIES); List<SlobrokEntryResponse> response = resource.getSlobrokEntries( APPLICATION_INSTANCE_REFERENCE, pattern); - verify(slobrokMonitorManager).lookup(APPLICATION_ID, expectedLookupPattern); + verify(slobrokApi).lookup(APPLICATION_ID, expectedLookupPattern); ObjectMapper mapper = new ObjectMapper(); String actualJson = mapper.writeValueAsString(response); diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java new file mode 100644 index 00000000000..35003313775 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/ServiceStatusProvider.java @@ -0,0 +1,19 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public interface ServiceStatusProvider { + /** Get the {@link ServiceStatus} of a particular service. */ + ServiceStatus getStatus(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + ConfigId configId); +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokApi.java index 9cffa1192be..dff605b888d 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokMonitorManager.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/SlobrokApi.java @@ -3,23 +3,13 @@ package com.yahoo.vespa.service.monitor; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; -import com.yahoo.vespa.applicationmodel.ConfigId; -import com.yahoo.vespa.applicationmodel.ServiceStatus; -import com.yahoo.vespa.applicationmodel.ServiceType; import java.util.List; -public interface SlobrokMonitorManager { +public interface SlobrokApi extends ServiceStatusProvider { /** * Get all Slobrok entries that has a name matching pattern as described in * Mirror::lookup. */ List<Mirror.Entry> lookup(ApplicationId application, String pattern); - - /** - * Query the ServiceMonitorStatus of a particular service. - */ - ServiceStatus getStatus(ApplicationId applicationId, - ServiceType serviceType, - ConfigId configId); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java new file mode 100644 index 00000000000..121e1fd5ebf --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/HealthMonitorManager.java @@ -0,0 +1,39 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.google.inject.Inject; +import com.yahoo.config.model.api.ApplicationInfo; +import com.yahoo.config.model.api.SuperModel; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public class HealthMonitorManager implements MonitorManager { + @Inject + public HealthMonitorManager() {} + + @Override + public void applicationActivated(SuperModel superModel, ApplicationInfo application) { + } + + @Override + public void applicationRemoved(SuperModel superModel, ApplicationId id) { + } + + @Override + public ServiceStatus getStatus(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType, ConfigId configId) { + // TODO: Do proper health check + if (ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType)) { + return ServiceStatus.UP; + } + + throw new IllegalArgumentException("Health monitoring not implemented for application " + + applicationId.toShortString() + ", cluster " + clusterId.s() + ", serviceType " + + serviceType); + } +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java index 961d5701901..ca70b18439b 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ModelGenerator.java @@ -20,7 +20,7 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.service.monitor.ServiceModel; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.ServiceStatusProvider; import java.util.HashMap; import java.util.HashSet; @@ -44,7 +44,7 @@ public class ModelGenerator { SuperModel superModel, Zone zone, List<String> configServerHosts, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { Map<ApplicationInstanceReference, ApplicationInstance> applicationInstances = new HashMap<>(); for (ApplicationInfo applicationInfo : superModel.getAllApplicationInfos()) { @@ -52,7 +52,7 @@ public class ModelGenerator { ApplicationInstance applicationInstance = toApplicationInstance( applicationInfo, zone, - slobrokMonitorManager); + serviceStatusProvider); applicationInstances.put(applicationInstance.reference(), applicationInstance); } @@ -70,7 +70,7 @@ public class ModelGenerator { ApplicationInstance toApplicationInstance( ApplicationInfo applicationInfo, Zone zone, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { Map<ServiceClusterKey, Set<ServiceInstance>> groupedServiceInstances = new HashMap<>(); for (HostInfo host : applicationInfo.getModel().getHosts()) { @@ -80,9 +80,10 @@ public class ModelGenerator { ServiceInstance serviceInstance = toServiceInstance( applicationInfo.getApplicationId(), + serviceClusterKey.clusterId(), serviceInfo, hostName, - slobrokMonitorManager); + serviceStatusProvider); if (!groupedServiceInstances.containsKey(serviceClusterKey)) { groupedServiceInstances.put(serviceClusterKey, new HashSet<>()); @@ -114,28 +115,33 @@ public class ModelGenerator { return applicationInstance; } - ServiceClusterKey toServiceClusterKey(ServiceInfo serviceInfo) { - ClusterId clusterId = new ClusterId(serviceInfo.getProperty(CLUSTER_ID_PROPERTY_NAME).orElse("")); + static ClusterId getClusterId(ServiceInfo serviceInfo) { + return new ClusterId(serviceInfo.getProperty(CLUSTER_ID_PROPERTY_NAME).orElse("")); + } + + private ServiceClusterKey toServiceClusterKey(ServiceInfo serviceInfo) { + ClusterId clusterId = getClusterId(serviceInfo); ServiceType serviceType = toServiceType(serviceInfo); return new ServiceClusterKey(clusterId, serviceType); } - ServiceInstance toServiceInstance( + private ServiceInstance toServiceInstance( ApplicationId applicationId, + ClusterId clusterId, ServiceInfo serviceInfo, HostName hostName, - SlobrokMonitorManager slobrokMonitorManager) { + ServiceStatusProvider serviceStatusProvider) { ConfigId configId = new ConfigId(serviceInfo.getConfigId()); - ServiceStatus status = slobrokMonitorManager.getStatus( + ServiceStatus status = serviceStatusProvider.getStatus( applicationId, - toServiceType(serviceInfo), - configId); + clusterId, + toServiceType(serviceInfo), configId); return new ServiceInstance(configId, hostName, status); } - ApplicationInstanceId toApplicationInstanceId(ApplicationInfo applicationInfo, Zone zone) { + private ApplicationInstanceId toApplicationInstanceId(ApplicationInfo applicationInfo, Zone zone) { return new ApplicationInstanceId(String.format("%s:%s:%s:%s", applicationInfo.getApplicationId().application().value(), zone.environment().value(), @@ -143,7 +149,7 @@ public class ModelGenerator { applicationInfo.getApplicationId().instance().value())); } - ServiceType toServiceType(ServiceInfo serviceInfo) { + private ServiceType toServiceType(ServiceInfo serviceInfo) { return new ServiceType(serviceInfo.getServiceType()); } } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java new file mode 100644 index 00000000000..49863672c43 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/MonitorManager.java @@ -0,0 +1,11 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.config.model.api.SuperModelListener; +import com.yahoo.vespa.service.monitor.ServiceStatusProvider; + +/** + * @author hakon + */ +public interface MonitorManager extends SuperModelListener, ServiceStatusProvider { +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java index 282a0797912..b2b6538fe6c 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ServiceMonitorImpl.java @@ -15,42 +15,41 @@ import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import java.util.stream.Collectors; public class ServiceMonitorImpl implements ServiceMonitor { - private static final Logger logger = Logger.getLogger(ServiceMonitorImpl.class.getName()); - - private final Zone zone; - private final List<String> configServerHosts; private final ServiceModelCache serviceModelCache; @Inject public ServiceMonitorImpl(SuperModelProvider superModelProvider, ConfigserverConfig configserverConfig, SlobrokMonitorManagerImpl slobrokMonitorManager, + HealthMonitorManager healthMonitorManager, Metric metric, Timer timer) { - this.zone = superModelProvider.getZone(); - this.configServerHosts = toConfigServerList(configserverConfig); + Zone zone = superModelProvider.getZone(); + List<String> configServerHosts = toConfigServerList(configserverConfig); ServiceMonitorMetrics metrics = new ServiceMonitorMetrics(metric, timer); - SuperModelListenerImpl superModelListener = new SuperModelListenerImpl( + UnionMonitorManager monitorManager = new UnionMonitorManager( slobrokMonitorManager, + healthMonitorManager, + configserverConfig); + + SuperModelListenerImpl superModelListener = new SuperModelListenerImpl( + monitorManager, metrics, new ModelGenerator(), zone, configServerHosts); superModelListener.start(superModelProvider); - serviceModelCache = new ServiceModelCache( - () -> superModelListener.get(), - timer); + serviceModelCache = new ServiceModelCache(superModelListener, timer); } private List<String> toConfigServerList(ConfigserverConfig configserverConfig) { if (configserverConfig.multitenant()) { return configserverConfig.zookeeperserver().stream() - .map(server -> server.hostname()) + .map(ConfigserverConfig.Zookeeperserver::hostname) .collect(Collectors.toList()); } diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java index 801f4b05079..b96364bf95e 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImpl.java @@ -8,10 +8,11 @@ import com.yahoo.config.model.api.SuperModelListener; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jrt.slobrok.api.Mirror; import com.yahoo.log.LogLevel; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.service.monitor.SlobrokMonitorManager; +import com.yahoo.vespa.service.monitor.SlobrokApi; import java.util.HashMap; import java.util.List; @@ -19,7 +20,7 @@ import java.util.Optional; import java.util.function.Supplier; import java.util.logging.Logger; -public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMonitorManager { +public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokApi, MonitorManager { private static final Logger logger = Logger.getLogger(SlobrokMonitorManagerImpl.class.getName()); @@ -30,7 +31,7 @@ public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMon @Inject public SlobrokMonitorManagerImpl() { - this(() -> new SlobrokMonitor()); + this(SlobrokMonitor::new); } SlobrokMonitorManagerImpl(Supplier<SlobrokMonitor> slobrokMonitorFactory) { @@ -74,7 +75,7 @@ public class SlobrokMonitorManagerImpl implements SuperModelListener, SlobrokMon @Override public ServiceStatus getStatus(ApplicationId applicationId, - ServiceType serviceType, + ClusterId clusterId, ServiceType serviceType, ConfigId configId) { Optional<String> slobrokServiceName = findSlobrokServiceName(serviceType, configId); if (slobrokServiceName.isPresent()) { diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java index 82d55cd05d7..5e309d3c18d 100644 --- a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/SuperModelListenerImpl.java @@ -24,10 +24,10 @@ public class SuperModelListenerImpl implements SuperModelListener, Supplier<Serv // superModel and slobrokMonitorManager are always updated together // and atomically using this monitor. private final Object monitor = new Object(); - private final SlobrokMonitorManagerImpl slobrokMonitorManager; + private final MonitorManager slobrokMonitorManager; private SuperModel superModel; - SuperModelListenerImpl(SlobrokMonitorManagerImpl slobrokMonitorManager, + SuperModelListenerImpl(MonitorManager slobrokMonitorManager, ServiceMonitorMetrics metrics, ModelGenerator modelGenerator, Zone zone, diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java new file mode 100644 index 00000000000..0bb4dea5a94 --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManager.java @@ -0,0 +1,57 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.cloud.config.ConfigserverConfig; +import com.yahoo.config.model.api.ApplicationInfo; +import com.yahoo.config.model.api.SuperModel; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceType; + +/** + * @author hakon + */ +public class UnionMonitorManager implements MonitorManager { + private final SlobrokMonitorManagerImpl slobrokMonitorManager; + private final HealthMonitorManager healthMonitorManager; + private final ConfigserverConfig configserverConfig; + + UnionMonitorManager(SlobrokMonitorManagerImpl slobrokMonitorManager, + HealthMonitorManager healthMonitorManager, + ConfigserverConfig configserverConfig) { + this.slobrokMonitorManager = slobrokMonitorManager; + this.healthMonitorManager = healthMonitorManager; + this.configserverConfig = configserverConfig; + } + + @Override + public ServiceStatus getStatus(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + ConfigId configId) { + MonitorManager monitorManager = useHealth(applicationId, clusterId, serviceType) ? + healthMonitorManager : + slobrokMonitorManager; + + return monitorManager.getStatus(applicationId, clusterId, serviceType, configId); + } + + @Override + public void applicationActivated(SuperModel superModel, ApplicationInfo application) { + slobrokMonitorManager.applicationActivated(superModel, application); + healthMonitorManager.applicationActivated(superModel, application); + } + + @Override + public void applicationRemoved(SuperModel superModel, ApplicationId id) { + slobrokMonitorManager.applicationRemoved(superModel, id); + healthMonitorManager.applicationRemoved(superModel, id); + } + + private boolean useHealth(ApplicationId applicationId, ClusterId clusterId, ServiceType serviceType) { + return !configserverConfig.nodeAdminInContainer() && + ZoneApplication.isNodeAdminService(applicationId, clusterId, serviceType); + } +} diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java new file mode 100644 index 00000000000..f7097e867df --- /dev/null +++ b/service-monitor/src/main/java/com/yahoo/vespa/service/monitor/internal/ZoneApplication.java @@ -0,0 +1,26 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ServiceType; + +import java.util.Objects; + +/** + * @author hakon + */ +public class ZoneApplication { + private ZoneApplication() {} + + static final ApplicationId ZONE_APPLICATION_ID = + ApplicationId.from("hosted-vespa", "routing", "default"); + + static boolean isNodeAdminService(ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType) { + return Objects.equals(applicationId, ZONE_APPLICATION_ID) && + Objects.equals(serviceType, ServiceType.CONTAINER) && + Objects.equals(clusterId, ClusterId.NODE_ADMIN); + } +} diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java index cf07c39950c..1348c04a7e5 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/ModelGeneratorTest.java @@ -44,7 +44,7 @@ public class ModelGeneratorTest { .collect(Collectors.toList()); SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); - when(slobrokMonitorManager.getStatus(any(), any(), any())) + when(slobrokMonitorManager.getStatus(any(), any(), any(), any())) .thenReturn(ServiceStatus.UP); ServiceModel serviceModel = @@ -88,7 +88,7 @@ public class ModelGeneratorTest { List<String> configServerHosts = Collections.emptyList(); SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); - when(slobrokMonitorManager.getStatus(any(), any(), any())) + when(slobrokMonitorManager.getStatus(any(), any(), any(), any())) .thenReturn(ServiceStatus.UP); ServiceModel serviceModel = diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplImplTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplTest.java index 79f927f6161..ab50b3192e3 100644 --- a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplImplTest.java +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/SlobrokMonitorManagerImplTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.service.monitor.internal; import com.yahoo.config.model.api.ApplicationInfo; import com.yahoo.config.model.api.SuperModel; +import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ConfigId; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; @@ -19,7 +20,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -public class SlobrokMonitorManagerImplImplTest { +public class SlobrokMonitorManagerImplTest { // IntelliJ complains if parametrized type is specified, Maven complains if not specified. @SuppressWarnings("unchecked") private final Supplier<SlobrokMonitor> slobrokMonitorFactory = mock(Supplier.class); @@ -29,6 +30,7 @@ public class SlobrokMonitorManagerImplImplTest { private final SlobrokMonitor slobrokMonitor = mock(SlobrokMonitor.class); private final SuperModel superModel = mock(SuperModel.class); private final ApplicationInfo application = mock(ApplicationInfo.class); + private final ClusterId clusterId = new ClusterId("cluster-id"); @Before public void setup() { @@ -70,8 +72,8 @@ public class SlobrokMonitorManagerImplImplTest { private ServiceStatus getStatus(String serviceType) { return slobrokMonitorManager.getStatus( application.getApplicationId(), - new ServiceType(serviceType), - new ConfigId("config.id")); + clusterId, + new ServiceType(serviceType), new ConfigId("config.id")); } @Test diff --git a/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java new file mode 100644 index 00000000000..2597ebe65d3 --- /dev/null +++ b/service-monitor/src/test/java/com/yahoo/vespa/service/monitor/internal/UnionMonitorManagerTest.java @@ -0,0 +1,93 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.service.monitor.internal; + +import com.yahoo.cloud.config.ConfigserverConfig; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.ConfigId; +import com.yahoo.vespa.applicationmodel.ServiceType; +import org.junit.Test; + +import static com.yahoo.vespa.applicationmodel.ClusterId.NODE_ADMIN; +import static com.yahoo.vespa.applicationmodel.ServiceType.CONTAINER; +import static com.yahoo.vespa.service.monitor.internal.ZoneApplication.ZONE_APPLICATION_ID; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class UnionMonitorManagerTest { + @Test + public void nodeAdminInContainer() { + testWith( + true, + ZONE_APPLICATION_ID, + NODE_ADMIN, + CONTAINER, + 1, + 0); + } + + @Test + public void nodeAdminOutsideContainer() { + boolean inContainer = false; + + // When nodeAdminInContainer is set, then only the node admin cluster should use health + testWith( + inContainer, + ZONE_APPLICATION_ID, + NODE_ADMIN, + CONTAINER, + 0, + 1); + + testWith( + inContainer, + ApplicationId.fromSerializedForm("a:b:default"), + NODE_ADMIN, + CONTAINER, + 1, + 0); + + testWith( + inContainer, + ZONE_APPLICATION_ID, + new ClusterId("foo"), + CONTAINER, + 1, + 0); + + testWith( + inContainer, + ZONE_APPLICATION_ID, + NODE_ADMIN, + new ServiceType("foo"), + 1, + 0); + } + + private void testWith(boolean nodeAdminInContainer, + ApplicationId applicationId, + ClusterId clusterId, + ServiceType serviceType, + int expectedSlobrokCalls, + int expectedHealthCalls) { + SlobrokMonitorManagerImpl slobrokMonitorManager = mock(SlobrokMonitorManagerImpl.class); + HealthMonitorManager healthMonitorManager = mock(HealthMonitorManager.class); + + ConfigserverConfig.Builder builder = new ConfigserverConfig.Builder(); + builder.nodeAdminInContainer(nodeAdminInContainer); + ConfigserverConfig config = new ConfigserverConfig(builder); + + + UnionMonitorManager manager = new UnionMonitorManager( + slobrokMonitorManager, + healthMonitorManager, + config); + + manager.getStatus(applicationId, clusterId, serviceType, new ConfigId("config-id")); + + verify(slobrokMonitorManager, times(expectedSlobrokCalls)).getStatus(any(), any(), any(), any()); + verify(healthMonitorManager, times(expectedHealthCalls)).getStatus(any(), any(), any(), any()); + } +}
\ No newline at end of file diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index 08262e30fe6..559afffc795 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -183,15 +183,16 @@ protected: bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const; ClusterInformation::CSP createClusterInfo(const std::string& clusterStateString) { - auto clusterState = std::make_shared<lib::ClusterState>(clusterStateString); + lib::ClusterState baselineClusterState(clusterStateString); + lib::ClusterStateBundle clusterStateBundle(baselineClusterState); ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( getBucketDBUpdater().getDistributorComponent().getIndex(), - *clusterState, + clusterStateBundle, "ui")); auto &repo = getBucketSpaceRepo(); for (auto &elem : repo) { - elem.second->setClusterState(clusterState); + elem.second->setClusterState(clusterStateBundle.getDerivedClusterState(elem.first)); } return clusterInfo; } diff --git a/storage/src/tests/distributor/distributortestutil.cpp b/storage/src/tests/distributor/distributortestutil.cpp index fbf5a14c052..8310266c9cb 100644 --- a/storage/src/tests/distributor/distributortestutil.cpp +++ b/storage/src/tests/distributor/distributortestutil.cpp @@ -240,7 +240,7 @@ DistributorTestUtil::removeFromBucketDB(const document::BucketId& id) void DistributorTestUtil::addIdealNodes(const document::BucketId& id) { - addIdealNodes(getExternalOperationHandler().getClusterState(), id); + addIdealNodes(*getExternalOperationHandler().getClusterStateBundle().getBaselineClusterState(), id); } void @@ -389,7 +389,7 @@ DistributorTestUtil::getBucketSpaces() const void DistributorTestUtil::enableDistributorClusterState(vespalib::stringref state) { - _distributor->enableClusterState(lib::ClusterState(state)); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(lib::ClusterState(state))); } } diff --git a/storage/src/tests/distributor/idealstatemanagertest.cpp b/storage/src/tests/distributor/idealstatemanagertest.cpp index 7103a89229d..7401e083900 100644 --- a/storage/src/tests/distributor/idealstatemanagertest.cpp +++ b/storage/src/tests/distributor/idealstatemanagertest.cpp @@ -49,7 +49,7 @@ public: void testBlockCheckForAllOperationsToSpecificBucket(); void setSystemState(const lib::ClusterState& systemState) { - _distributor->enableClusterState(systemState); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(systemState)); } CPPUNIT_TEST_SUITE(IdealStateManagerTest); diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index b339d1b4601..c265a0972af 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -93,7 +93,7 @@ struct StateCheckersTest : public CppUnit::TestFixture, void statsUpdatedWhenMergingDueToOutOfSyncCopies(); void enableClusterState(const lib::ClusterState& systemState) { - _distributor->enableClusterState(systemState); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(systemState)); } void insertJoinableBuckets(); diff --git a/storage/src/tests/distributor/visitoroperationtest.cpp b/storage/src/tests/distributor/visitoroperationtest.cpp index b6afcd4f3ab..af580480563 100644 --- a/storage/src/tests/distributor/visitoroperationtest.cpp +++ b/storage/src/tests/distributor/visitoroperationtest.cpp @@ -1095,7 +1095,7 @@ void VisitorOperationTest::testVisitIdealNode() { ClusterState state("distributor:1 storage:3"); - _distributor->enableClusterState(state); + _distributor->enableClusterStateBundle(lib::ClusterStateBundle(state)); // Create buckets in bucketdb for (int i=0; i<32; i++ ) { diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp index 36b48a545f1..84332851340 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp @@ -63,7 +63,7 @@ BucketOwnership BucketDBUpdater::checkOwnershipInPendingState(const document::Bucket& b) const { if (hasPendingClusterState()) { - const lib::ClusterState& state(_pendingClusterState->getNewClusterState()); + const lib::ClusterState& state(*_pendingClusterState->getNewClusterStateBundle().getDerivedClusterState(b.getBucketSpace())); if (!_distributorComponent.ownsBucketInState(state, b)) { return BucketOwnership::createNotOwnedInState(state); } @@ -77,7 +77,7 @@ BucketDBUpdater::sendRequestBucketInfo( const document::Bucket& bucket, const std::shared_ptr<MergeReplyGuard>& mergeReplyGuard) { - if (!_distributorComponent.storageNodeIsUp(node)) { + if (!_distributorComponent.storageNodeIsUp(bucket.getBucketSpace(), node)) { return; } @@ -112,7 +112,7 @@ BucketDBUpdater::recheckBucketInfo(uint32_t nodeIdx, void BucketDBUpdater::removeSuperfluousBuckets( - const lib::ClusterState& newState) + const lib::ClusterStateBundle& newState) { for (auto &elem : _distributorComponent.getBucketSpaceRepo()) { const auto &newDistribution(elem.second->getDistribution()); @@ -123,7 +123,7 @@ BucketDBUpdater::removeSuperfluousBuckets( // being on storage nodes that are no longer up. NodeRemover proc( oldClusterState, - newState, + *newState.getDerivedClusterState(elem.first), _distributorComponent.getBucketIdFactory(), _distributorComponent.getIndex(), newDistribution, @@ -159,11 +159,11 @@ BucketDBUpdater::storageDistributionChanged() { ensureTransitionTimerStarted(); - removeSuperfluousBuckets(_distributorComponent.getClusterState()); + removeSuperfluousBuckets(_distributorComponent.getClusterStateBundle()); ClusterInformation::CSP clusterInfo(new SimpleClusterInformation( _distributorComponent.getIndex(), - _distributorComponent.getClusterState(), + _distributorComponent.getClusterStateBundle(), _distributorComponent.getDistributor().getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForDistributionChange( _distributorComponent.getClock(), @@ -193,21 +193,21 @@ BucketDBUpdater::onSetSystemState( "Received new cluster state %s", cmd->getSystemState().toString().c_str()); - lib::ClusterState oldState = _distributorComponent.getClusterState(); - const lib::ClusterState& state = cmd->getSystemState(); + const lib::ClusterStateBundle oldState = _distributorComponent.getClusterStateBundle(); + const lib::ClusterStateBundle& state = cmd->getClusterStateBundle(); if (state == oldState) { return false; } ensureTransitionTimerStarted(); - removeSuperfluousBuckets(cmd->getSystemState()); + removeSuperfluousBuckets(cmd->getClusterStateBundle()); replyToPreviousPendingClusterStateIfAny(); ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( _distributorComponent.getIndex(), - _distributorComponent.getClusterState(), + _distributorComponent.getClusterStateBundle(), _distributorComponent.getDistributor() .getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForClusterStateChange( @@ -423,7 +423,7 @@ BucketDBUpdater::processSingleBucketInfoReply( BucketRequest req = iter->second; _sentMessages.erase(iter); - if (!_distributorComponent.storageNodeIsUp(req.targetNode)) { + if (!_distributorComponent.storageNodeIsUp(req.bucket.getBucketSpace(), req.targetNode)) { // Ignore replies from nodes that are down. return true; } @@ -489,7 +489,7 @@ BucketDBUpdater::processCompletedPendingClusterState() _pendingClusterState->mergeIntoBucketDatabases(); if (_pendingClusterState->getCommand().get()) { - enableCurrentClusterStateInDistributor(); + enableCurrentClusterStateBundleInDistributor(); _distributorComponent.getDistributor().getMessageSender().sendDown( _pendingClusterState->getCommand()); addCurrentStateToClusterStateHistory(); @@ -504,16 +504,16 @@ BucketDBUpdater::processCompletedPendingClusterState() } void -BucketDBUpdater::enableCurrentClusterStateInDistributor() +BucketDBUpdater::enableCurrentClusterStateBundleInDistributor() { - const lib::ClusterState& state( - _pendingClusterState->getCommand()->getSystemState()); + const lib::ClusterStateBundle& state( + _pendingClusterState->getCommand()->getClusterStateBundle()); LOG(debug, "BucketDBUpdater finished processing state %s", - state.toString().c_str()); + state.getBaselineClusterState()->toString().c_str()); - _distributorComponent.getDistributor().enableClusterState(state); + _distributorComponent.getDistributor().enableClusterStateBundle(state); } void @@ -564,7 +564,7 @@ BucketDBUpdater::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, using namespace vespalib::xml; xos << XmlTag("bucketdb") << XmlTag("systemstate_active") - << XmlContent(_distributorComponent.getClusterState().toString()) + << XmlContent(_distributorComponent.getClusterStateBundle().getBaselineClusterState()->toString()) << XmlEndTag(); if (_pendingClusterState) { xos << *_pendingClusterState; diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.h b/storage/src/vespa/storage/distributor/bucketdbupdater.h index 19e2e259778..a85ee6fe4f7 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.h +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.h @@ -152,11 +152,11 @@ private: void updateState(const lib::ClusterState& oldState, const lib::ClusterState& newState); - void removeSuperfluousBuckets(const lib::ClusterState& newState); + void removeSuperfluousBuckets(const lib::ClusterStateBundle& newState); void replyToPreviousPendingClusterStateIfAny(); - void enableCurrentClusterStateInDistributor(); + void enableCurrentClusterStateBundleInDistributor(); void addCurrentStateToClusterStateHistory(); void enqueueRecheckUntilPendingStateEnabled(uint16_t node, const document::Bucket&); void sendAllQueuedBucketRechecks(); diff --git a/storage/src/vespa/storage/distributor/clusterinformation.cpp b/storage/src/vespa/storage/distributor/clusterinformation.cpp index cd09e4f46d4..96e94c92819 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.cpp +++ b/storage/src/vespa/storage/distributor/clusterinformation.cpp @@ -2,6 +2,7 @@ #include "clusterinformation.h" #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include <vespa/vdslib/state/clusterstate.h> namespace storage::distributor { @@ -9,7 +10,7 @@ namespace storage::distributor { uint16_t ClusterInformation::getStorageNodeCount() const { - return getClusterState().getNodeCount(lib::NodeType::STORAGE); + return getClusterStateBundle().getBaselineClusterState()->getNodeCount(lib::NodeType::STORAGE); } } diff --git a/storage/src/vespa/storage/distributor/clusterinformation.h b/storage/src/vespa/storage/distributor/clusterinformation.h index 25f303d0f52..49abb5e8e75 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.h +++ b/storage/src/vespa/storage/distributor/clusterinformation.h @@ -10,8 +10,7 @@ namespace storage { namespace lib { -class Distribution; -class ClusterState; +class ClusterStateBundle; } @@ -26,7 +25,7 @@ public: virtual uint16_t getDistributorIndex() const = 0; - virtual const lib::ClusterState& getClusterState() const = 0; + virtual const lib::ClusterStateBundle& getClusterStateBundle() const = 0; virtual const char* getStorageUpStates() const = 0; diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index fa6d63da32e..86a8ac46cbb 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -63,6 +63,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, : StorageLink("distributor"), DistributorInterface(), framework::StatusReporter("distributor", "Distributor"), + _clusterStateBundle(lib::ClusterState()), _compReg(compReg), _component(compReg, "distributor"), _bucketSpaceRepo(std::make_unique<DistributorBucketSpaceRepo>()), @@ -332,17 +333,24 @@ Distributor::handleMessage(const std::shared_ptr<api::StorageMessage>& msg) return false; } +const lib::ClusterStateBundle& +Distributor::getClusterStateBundle() const +{ + return _clusterStateBundle; +} + void -Distributor::enableClusterState(const lib::ClusterState& state) +Distributor::enableClusterStateBundle(const lib::ClusterStateBundle& state) { - lib::ClusterState oldState = _clusterState; - _clusterState = state; + lib::ClusterStateBundle oldState = _clusterStateBundle; + _clusterStateBundle = state; propagateClusterStates(); lib::Node myNode(lib::NodeType::DISTRIBUTOR, _component.getIndex()); + const auto &baselineState = *_clusterStateBundle.getBaselineClusterState(); if (!_doneInitializing && - getClusterState().getNodeState(myNode).getState() == lib::State::UP) + baselineState.getNodeState(myNode).getState() == lib::State::UP) { scanAllBuckets(); _doneInitializing = true; @@ -352,8 +360,8 @@ Distributor::enableClusterState(const lib::ClusterState& state) } // Clear all active messages on nodes that are down. - for (uint16_t i = 0; i < state.getNodeCount(lib::NodeType::STORAGE); ++i) { - if (!state.getNodeState(lib::Node(lib::NodeType::STORAGE, i)).getState() + for (uint16_t i = 0; i < baselineState.getNodeCount(lib::NodeType::STORAGE); ++i) { + if (!baselineState.getNodeState(lib::Node(lib::NodeType::STORAGE, i)).getState() .oneOf(getStorageNodeUpStates())) { std::vector<uint64_t> msgIds( @@ -537,9 +545,8 @@ Distributor::propagateDefaultDistribution( void Distributor::propagateClusterStates() { - auto clusterState = std::make_shared<lib::ClusterState>(_clusterState); for (auto &iter : *_bucketSpaceRepo) { - iter.second->setClusterState(clusterState); + iter.second->setClusterState(_clusterStateBundle.getDerivedClusterState(iter.first)); } } diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h index 9ec21d6ab05..e28c6dd6578 100644 --- a/storage/src/vespa/storage/distributor/distributor.h +++ b/storage/src/vespa/storage/distributor/distributor.h @@ -78,7 +78,7 @@ public: * Enables a new cluster state. Called after the bucket db updater has * retrieved all bucket info related to the change. */ - void enableClusterState(const lib::ClusterState& clusterState) override; + void enableClusterStateBundle(const lib::ClusterStateBundle& clusterStateBundle) override; /** * Invoked when a pending cluster state for a distribution (config) @@ -114,9 +114,7 @@ public: */ void checkBucketForSplit(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, uint8_t priority) override; - const lib::ClusterState& getClusterState() const override { - return _clusterState; - } + const lib::ClusterStateBundle& getClusterStateBundle() const override; /** * @return Returns the states in which the distributors consider @@ -235,7 +233,7 @@ private: void propagateDefaultDistribution(std::shared_ptr<const lib::Distribution>); void propagateClusterStates(); - lib::ClusterState _clusterState; + lib::ClusterStateBundle _clusterStateBundle; DistributorComponentRegister& _compReg; storage::DistributorComponent _component; diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.cpp b/storage/src/vespa/storage/distributor/distributorcomponent.cpp index 8fa412ea38b..1d2465fb41a 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.cpp +++ b/storage/src/vespa/storage/distributor/distributorcomponent.cpp @@ -3,6 +3,7 @@ #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/messageapi/storagereply.h> #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include "distributor_bucket_space_repo.h" #include "distributor_bucket_space.h" @@ -40,10 +41,10 @@ DistributorComponent::sendUp(const api::StorageMessage::SP& msg) _distributor.getMessageSender().sendUp(msg); } -const lib::ClusterState& -DistributorComponent::getClusterState() const +const lib::ClusterStateBundle& +DistributorComponent::getClusterStateBundle() const { - return _distributor.getClusterState(); + return _distributor.getClusterStateBundle(); }; std::vector<uint16_t> @@ -305,9 +306,9 @@ DistributorComponent::getBucketId(const document::DocumentId& docId) const } bool -DistributorComponent::storageNodeIsUp(uint32_t nodeIndex) const +DistributorComponent::storageNodeIsUp(document::BucketSpace bucketSpace, uint32_t nodeIndex) const { - const lib::NodeState& ns = getClusterState().getNodeState( + const lib::NodeState& ns = getClusterStateBundle().getDerivedClusterState(bucketSpace)->getNodeState( lib::Node(lib::NodeType::STORAGE, nodeIndex)); return ns.getState().oneOf(_distributor.getStorageNodeUpStates()); diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.h b/storage/src/vespa/storage/distributor/distributorcomponent.h index 33e86d423e7..184ac768afb 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.h +++ b/storage/src/vespa/storage/distributor/distributorcomponent.h @@ -68,10 +68,10 @@ public: bool ownsBucketInCurrentState(const document::Bucket &bucket) const; /** - * Returns a reference to the current system state. Valid until the next - * time the distributor main thread processes its message queue. + * Returns a reference to the current cluster state bundle. Valid until the + * next time the distributor main thread processes its message queue. */ - const lib::ClusterState& getClusterState() const; + const lib::ClusterStateBundle& getClusterStateBundle() const; /** * Returns the ideal nodes for the given bucket. @@ -86,7 +86,7 @@ public: /** * Returns true if the given storage node is in an "up state". */ - bool storageNodeIsUp(uint32_t nodeIndex) const; + bool storageNodeIsUp(document::BucketSpace bucketSpace, uint32_t nodeIndex) const; /** * Verifies that the given command has been received at the diff --git a/storage/src/vespa/storage/distributor/distributorinterface.h b/storage/src/vespa/storage/distributor/distributorinterface.h index 3445397c17d..17c300fa0a9 100644 --- a/storage/src/vespa/storage/distributor/distributorinterface.h +++ b/storage/src/vespa/storage/distributor/distributorinterface.h @@ -8,6 +8,7 @@ #include <vespa/document/bucket/bucket.h> namespace storage::api { class MergeBucketReply; } +namespace storage::lib { class ClusterStateBundle; } namespace storage { class DistributorConfiguration; class DistributorMetricSet; @@ -21,7 +22,7 @@ class DistributorInterface : public DistributorMessageSender public: virtual PendingMessageTracker& getPendingMessageTracker() = 0; virtual DistributorMetricSet& getMetrics() = 0; - virtual void enableClusterState(const lib::ClusterState& state) = 0; + virtual void enableClusterStateBundle(const lib::ClusterStateBundle& state) = 0; virtual BucketOwnership checkOwnershipInPendingState(const document::Bucket &bucket) const = 0; virtual void notifyDistributionChangeEnabled() = 0; @@ -43,9 +44,9 @@ public: virtual void checkBucketForSplit(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, uint8_t pri) = 0; /** - * @return Returns the current cluster state. + * @return Returns the current cluster state bundle. */ - virtual const lib::ClusterState& getClusterState() const = 0; + virtual const lib::ClusterStateBundle& getClusterStateBundle() const = 0; /** * Returns true if the node is currently initializing. diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.cpp b/storage/src/vespa/storage/distributor/idealstatemanager.cpp index 4018bb88583..773014391fd 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemanager.cpp @@ -68,9 +68,10 @@ bool IdealStateManager::iAmUp() const { Node node(NodeType::DISTRIBUTOR, _distributorComponent.getIndex()); - const lib::State &nodeState = _distributorComponent.getClusterState() - .getNodeState(node).getState(); - const lib::State &clusterState = _distributorComponent.getClusterState().getClusterState(); + // Assume that derived cluster states agree on distributor node being up + const auto &state = *_distributorComponent.getClusterStateBundle().getBaselineClusterState(); + const lib::State &nodeState = state.getNodeState(node).getState(); + const lib::State &clusterState = state.getClusterState(); return (nodeState == lib::State::UP && clusterState == lib::State::UP); } @@ -278,7 +279,7 @@ void IdealStateManager::dump_bucket_space_db_status(document::BucketSpace bucket void IdealStateManager::getBucketStatus(std::ostream& out) const { LOG(debug, "Dumping bucket database valid at cluster state version %u", - _distributorComponent.getDistributor().getClusterState().getVersion()); + _distributorComponent.getDistributor().getClusterStateBundle().getVersion()); for (auto& space : _bucketSpaceRepo) { dump_bucket_space_db_status(space.first, out); diff --git a/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp index 23e1081f9ae..1ea077fd1c1 100644 --- a/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/multioperationoperation.cpp @@ -119,7 +119,7 @@ MultiOperationOperation::onStart(DistributorMessageSender& sender) // Don't do anything if all nodes are down. bool up = false; for (uint16_t i = 0; i < systemState.getNodeCount(lib::NodeType::STORAGE); i++) { - if (_manager.storageNodeIsUp(i)) { + if (_manager.storageNodeIsUp(_msg->getBucket().getBucketSpace(), i)) { up = true; break; } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 3c96bc55161..a08445ca3d2 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -32,8 +32,8 @@ PendingClusterState::PendingClusterState( api::Timestamp creationTimestamp) : _cmd(newStateCmd), _requestedNodes(newStateCmd->getSystemState().getNodeCount(lib::NodeType::STORAGE)), - _prevClusterState(clusterInfo->getClusterState()), - _newClusterState(newStateCmd->getSystemState()), + _prevClusterStateBundle(clusterInfo->getClusterStateBundle()), + _newClusterStateBundle(newStateCmd->getClusterStateBundle()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), @@ -53,8 +53,8 @@ PendingClusterState::PendingClusterState( DistributorBucketSpaceRepo &bucketSpaceRepo, api::Timestamp creationTimestamp) : _requestedNodes(clusterInfo->getStorageNodeCount()), - _prevClusterState(clusterInfo->getClusterState()), - _newClusterState(clusterInfo->getClusterState()), + _prevClusterStateBundle(clusterInfo->getClusterStateBundle()), + _newClusterStateBundle(clusterInfo->getClusterStateBundle()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), @@ -79,7 +79,7 @@ PendingClusterState::initializeBucketSpaceTransitions(bool distributionChanged, auto pendingTransition = std::make_unique<PendingBucketSpaceDbTransition> (*this, *elem.second, distributionChanged, outdatedNodes, - _clusterInfo, _newClusterState, _creationTimestamp); + _clusterInfo, *_newClusterStateBundle.getDerivedClusterState(elem.first), _creationTimestamp); if (pendingTransition->getBucketOwnershipTransfer()) { _bucketOwnershipTransfer = true; } @@ -99,15 +99,15 @@ PendingClusterState::logConstructionInformation() const "New PendingClusterState constructed with previous cluster " "state '%s', new cluster state '%s', distribution config " "hash: '%s'", - _prevClusterState.toString().c_str(), - _newClusterState.toString().c_str(), + getPrevClusterStateBundleString().c_str(), + getNewClusterStateBundleString().c_str(), distribution.getNodeGraph().getDistributionConfigHash().c_str()); } bool -PendingClusterState::storageNodeUpInNewState(uint16_t node) const +PendingClusterState::storageNodeUpInNewState(document::BucketSpace bucketSpace, uint16_t node) const { - return _newClusterState.getNodeState(Node(NodeType::STORAGE, node)) + return _newClusterStateBundle.getDerivedClusterState(bucketSpace)->getNodeState(Node(NodeType::STORAGE, node)) .getState().oneOf(_clusterInfo->getStorageUpStates()); } @@ -124,7 +124,7 @@ PendingClusterState::getOutdatedNodesMap() const uint16_t PendingClusterState::newStateStorageNodeCount() const { - return _newClusterState.getNodeCount(lib::NodeType::STORAGE); + return _newClusterStateBundle.getBaselineClusterState()->getNodeCount(lib::NodeType::STORAGE); } bool @@ -144,15 +144,15 @@ PendingClusterState::shouldRequestBucketInfo() const bool PendingClusterState::clusterIsDown() const { - return _newClusterState.getClusterState() == lib::State::DOWN; + return _newClusterStateBundle.getBaselineClusterState()->getClusterState() == lib::State::DOWN; } bool PendingClusterState::iAmDown() const { const lib::NodeState& myState( - _newClusterState.getNodeState(Node(NodeType::DISTRIBUTOR, - _sender.getDistributorIndex()))); + _newClusterStateBundle.getBaselineClusterState()->getNodeState(Node(NodeType::DISTRIBUTOR, + _sender.getDistributorIndex()))); return myState.getState() == lib::State::DOWN; } @@ -161,8 +161,8 @@ PendingClusterState::requestNodes() { LOG(debug, "New system state: Old state was %s, new state is %s", - _prevClusterState.toString().c_str(), - _newClusterState.toString().c_str()); + getPrevClusterStateBundleString().c_str(), + getNewClusterStateBundleString().c_str()); requestBucketInfoFromStorageNodesWithChangedState(); } @@ -173,7 +173,7 @@ PendingClusterState::requestBucketInfoFromStorageNodesWithChangedState() for (auto &elem : _pendingTransitions) { const OutdatedNodes &outdatedNodes(elem.second->getOutdatedNodes()); for (uint16_t idx : outdatedNodes) { - if (storageNodeUpInNewState(idx)) { + if (storageNodeUpInNewState(elem.first, idx)) { requestNode(BucketSpaceAndNode(elem.first, idx)); } } @@ -191,14 +191,14 @@ PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode) "and distribution hash '%s'", bucketSpaceAndNode.bucketSpace.getId(), bucketSpaceAndNode.node, - _newClusterState.toString().c_str(), + getNewClusterStateBundleString().c_str(), distributionHash.c_str()); std::shared_ptr<api::RequestBucketInfoCommand> cmd( new api::RequestBucketInfoCommand( bucketSpaceAndNode.bucketSpace, _sender.getDistributorIndex(), - _newClusterState, + *_newClusterStateBundle.getDerivedClusterState(bucketSpaceAndNode.bucketSpace), distributionHash)); cmd->setPriority(api::StorageMessage::HIGH); @@ -294,7 +294,7 @@ PendingClusterState::printXml(vespalib::XmlOutputStream& xos) const { using namespace vespalib::xml; xos << XmlTag("systemstate_pending") - << XmlAttribute("state", _newClusterState); + << XmlAttribute("state", *_newClusterStateBundle.getBaselineClusterState()); for (auto &elem : _sentMessages) { xos << XmlTag("pending") << XmlAttribute("node", elem.second.node) @@ -306,8 +306,8 @@ PendingClusterState::printXml(vespalib::XmlOutputStream& xos) const PendingClusterState::Summary PendingClusterState::getSummary() const { - return Summary(_prevClusterState.toString(), - _newClusterState.toString(), + return Summary(getPrevClusterStateBundleString(), + getNewClusterStateBundleString(), (_clock.getTimeInMicros().getTime() - _creationTimestamp)); } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h index 2d75c795745..b96ba8cbbd7 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.h +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h @@ -8,6 +8,7 @@ #include <vespa/storageapi/message/state.h> #include <vespa/storageframework/generic/clock/clock.h> #include <vespa/vdslib/distribution/distribution.h> +#include <vespa/vdslib/state/cluster_state_bundle.h> #include <vespa/vespalib/util/xmlserializable.h> #include "outdated_nodes_map.h" #include <unordered_map> @@ -107,11 +108,8 @@ public: return _cmd; } - const lib::ClusterState& getNewClusterState() const { - return _newClusterState; - } - const lib::ClusterState& getPrevClusterState() const { - return _prevClusterState; + const lib::ClusterStateBundle& getNewClusterStateBundle() const { + return _newClusterStateBundle; } /** @@ -184,7 +182,13 @@ private: bool clusterIsDown() const; bool iAmDown() const; - bool storageNodeUpInNewState(uint16_t node) const; + bool storageNodeUpInNewState(document::BucketSpace bucketSpace, uint16_t node) const; + std::string getNewClusterStateBundleString() const { + return _newClusterStateBundle.getBaselineClusterState()->toString(); + } + std::string getPrevClusterStateBundleString() const { + return _prevClusterStateBundle.getBaselineClusterState()->toString(); + } std::shared_ptr<api::SetSystemStateCommand> _cmd; @@ -192,8 +196,8 @@ private: std::vector<bool> _requestedNodes; std::deque<std::pair<framework::MilliSecTime, BucketSpaceAndNode> > _delayedRequests; - lib::ClusterState _prevClusterState; - lib::ClusterState _newClusterState; + lib::ClusterStateBundle _prevClusterStateBundle; + lib::ClusterStateBundle _newClusterStateBundle; const framework::Clock& _clock; ClusterInformation::CSP _clusterInfo; diff --git a/storage/src/vespa/storage/distributor/simpleclusterinformation.h b/storage/src/vespa/storage/distributor/simpleclusterinformation.h index 2946abf620c..1247d425e50 100644 --- a/storage/src/vespa/storage/distributor/simpleclusterinformation.h +++ b/storage/src/vespa/storage/distributor/simpleclusterinformation.h @@ -11,10 +11,10 @@ class SimpleClusterInformation : public ClusterInformation { public: SimpleClusterInformation(uint16_t myIndex, - const lib::ClusterState& clusterState, + const lib::ClusterStateBundle& clusterStateBundle, const char* storageUpStates) : _myIndex(myIndex), - _clusterState(clusterState), + _clusterStateBundle(clusterStateBundle), _storageUpStates(storageUpStates) {} @@ -22,8 +22,8 @@ public: return _myIndex; } - const lib::ClusterState& getClusterState() const override { - return _clusterState; + const lib::ClusterStateBundle& getClusterStateBundle() const override { + return _clusterStateBundle; } const char* getStorageUpStates() const override { @@ -32,7 +32,7 @@ public: private: uint16_t _myIndex; - lib::ClusterState _clusterState; + lib::ClusterStateBundle _clusterStateBundle; const char* _storageUpStates; }; |