diff options
9 files changed, 219 insertions, 36 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index 0ec00f61311..303f5d5484b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -35,10 +35,12 @@ import com.yahoo.vespa.serviceview.bindings.ApplicationView; import java.net.URI; import java.time.Clock; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -208,10 +210,21 @@ public class Controller extends AbstractComponent { " to " + printableVersion(newStatus.systemVersion())); } curator.writeVersionStatus(newStatus); + // Removes confidence overrides for versions that no longer exist in the system + removeConfidenceOverride(version -> newStatus.versions().stream() + .noneMatch(vespaVersion -> vespaVersion.versionNumber() + .equals(version))); } /** Returns the latest known version status. Calling this is free but the status may be slightly out of date. */ public VersionStatus versionStatus() { return curator.readVersionStatus(); } + + /** Remove confidence override for versions matching given filter */ + public void removeConfidenceOverride(Predicate<Version> filter) { + Map<Version, VespaVersion.Confidence> overrides = new LinkedHashMap<>(curator().readConfidenceOverrides()); + overrides.keySet().removeIf(filter); + curator.writeConfidenceOverrides(overrides); + } /** Returns the current system version: The controller should drive towards running all applications on this version */ public Version systemVersion() { @@ -244,7 +257,7 @@ public class Controller extends AbstractComponent { return nodeRepositoryClient; } - private String printableVersion(Optional<VespaVersion> vespaVersion) { + private static String printableVersion(Optional<VespaVersion> vespaVersion) { return vespaVersion.map(v -> v.versionNumber().toFullString()).orElse("Unknown"); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index e492ec46081..8c661e7db9d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -9,11 +9,14 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; @@ -42,12 +45,12 @@ public class Upgrader extends Maintainer { public void maintain() { // Determine target versions for each upgrade policy Optional<Version> canaryTarget = controller().versionStatus().systemVersion().map(VespaVersion::versionNumber); - Optional<Version> defaultTarget = newestVersionWithConfidence(VespaVersion.Confidence.normal); - Optional<Version> conservativeTarget = newestVersionWithConfidence(VespaVersion.Confidence.high); + Optional<Version> defaultTarget = newestVersionWithConfidence(Confidence.normal); + Optional<Version> conservativeTarget = newestVersionWithConfidence(Confidence.high); // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation for (VespaVersion version : controller().versionStatus().versions()) { - if (version.confidence() == VespaVersion.Confidence.broken) + if (version.confidence() == Confidence.broken) cancelUpgradesOf(applications().without(UpgradePolicy.canary).upgradingTo(version.versionNumber()), version.versionNumber() + " is broken"); } @@ -67,7 +70,7 @@ public class Upgrader extends Maintainer { conservativeTarget.ifPresent(target -> upgrade(applications().with(UpgradePolicy.conservative), target)); } - private Optional<Version> newestVersionWithConfidence(VespaVersion.Confidence confidence) { + private Optional<Version> newestVersionWithConfidence(Confidence confidence) { return reversed(controller().versionStatus().versions()).stream() .filter(v -> v.confidence().equalOrHigherThan(confidence)) .findFirst() @@ -124,4 +127,20 @@ public class Upgrader extends Maintainer { curator.writeUpgradesPerMinute(n); } + /** Override confidence for given version. This will cause the computed confidence to be ignored */ + public void overrideConfidence(Version version, Confidence confidence) { + Map<Version, Confidence> overrides = new LinkedHashMap<>(curator.readConfidenceOverrides()); + overrides.put(version, confidence); + curator.writeConfidenceOverrides(overrides); + } + + /** Returns all confidence overrides */ + public Map<Version, Confidence> confidenceOverrides() { + return curator.readConfidenceOverrides(); + } + + /** Remove confidence override for given version */ + public void removeConfidenceOverride(Version version) { + controller().removeConfidenceOverride(v -> v.equals(version)); + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java new file mode 100644 index 00000000000..c56d8b3849c --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ConfidenceOverrideSerializer.java @@ -0,0 +1,42 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + + +import com.yahoo.component.Version; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.ObjectTraverser; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Serializes overrides of version confidence. + * + * @author mpolden + */ +public class ConfidenceOverrideSerializer { + + private final static String overridesField = "overrides"; + + public Slime toSlime(Map<Version, Confidence> overrides) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + Cursor object = root.setObject(overridesField); + overrides.forEach((version, confidence) -> object.setString(version.toString(), confidence.name())); + return slime; + } + + public Map<Version, Confidence> fromSlime(Slime slime) { + Cursor root = slime.get(); + Cursor overridesObject = root.field(overridesField); + Map<Version, Confidence> overrides = new LinkedHashMap<>(); + overridesObject.traverse((ObjectTraverser) (name, value) -> { + overrides.put(Version.fromString(name), Confidence.valueOf(value.asString())); + }); + return Collections.unmodifiableMap(overrides); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index e5370036ed3..d59ce1e7bed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.persistence; import com.google.inject.Inject; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.path.Path; import com.yahoo.transaction.NestedTransaction; @@ -11,6 +12,7 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import java.io.IOException; import java.io.UncheckedIOException; @@ -21,6 +23,7 @@ import java.util.Collections; import java.util.Deque; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -184,30 +187,57 @@ public class CuratorDb { return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); } + public void writeConfidenceOverrides(Map<Version, VespaVersion.Confidence> overrides) { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + try { + curator.set(confidenceOverridesPath(), SlimeUtils.toJsonBytes(serializer.toSlime(overrides))); + } catch (IOException e) { + throw new UncheckedIOException("Failed to serialize confidence overrides", e); + } + } + + public Map<Version, VespaVersion.Confidence> readConfidenceOverrides() { + ConfidenceOverrideSerializer serializer = new ConfidenceOverrideSerializer(); + Optional<byte[]> data = curator.getData(confidenceOverridesPath()); + if (!data.isPresent() || data.get().length == 0) { + return Collections.emptyMap(); + } + return serializer.fromSlime(SlimeUtils.jsonToSlime(data.get())); + } + + // The following methods are called by internal code + + @SuppressWarnings("unused") public Optional<byte[]> readProvisionState(String provisionId) { return curator.getData(provisionStatePath(provisionId)); } + @SuppressWarnings("unused") public void writeProvisionState(String provisionId, byte[] data) { curator.set(provisionStatePath(provisionId), data); } + @SuppressWarnings("unused") public List<String> readProvisionStateIds() { return curator.getChildren(provisionStatePath()); } + @SuppressWarnings("unused") public Optional<byte[]> readVespaServerPool() { return curator.getData(vespaServerPoolPath()); } + @SuppressWarnings("unused") public void writeVespaServerPool(byte[] data) { curator.set(vespaServerPoolPath(), data); } + @SuppressWarnings("unused") public Optional<byte[]> readOpenStackServerPool() { return curator.getData(openStackServerPoolPath()); } + @SuppressWarnings("unused") public void writeOpenStackServerPool(byte[] data) { curator.set(openStackServerPoolPath(), data); } @@ -238,33 +268,39 @@ public class CuratorDb { return lockPath; } - private Path inactiveJobsPath() { + private static Path inactiveJobsPath() { return root.append("inactiveJobs"); } - private Path jobQueuePath(DeploymentJobs.JobType jobType) { + private static Path jobQueuePath(DeploymentJobs.JobType jobType) { return root.append("jobQueues").append(jobType.name()); } - private Path upgradesPerMinutePath() { + private static Path upgradesPerMinutePath() { return root.append("upgrader").append("upgradesPerMinute"); } - private Path versionStatusPath() { return root.append("versionStatus"); } + private static Path confidenceOverridesPath() { + return root.append("upgrader").append("confidenceOverrides"); + } + + private static Path versionStatusPath() { + return root.append("versionStatus"); + } - private Path provisionStatePath() { + private static Path provisionStatePath() { return root.append("provisioning").append("states"); } - private Path provisionStatePath(String provisionId) { + private static Path provisionStatePath(String provisionId) { return provisionStatePath().append(provisionId); } - private Path vespaServerPoolPath() { + private static Path vespaServerPoolPath() { return root.append("vespaServerPool"); } - private Path openStackServerPoolPath() { + private static Path openStackServerPoolPath() { return root.append("openStackServerPool"); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java index 2a6c1f3da52..6fefb7099f1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiHandler.java @@ -1,12 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.restapi.controller; +import com.yahoo.component.Version; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.container.jdisc.HttpResponse; import com.yahoo.container.jdisc.LoggingRequestHandler; import com.yahoo.io.IOUtils; import com.yahoo.slime.Inspector; -import com.yahoo.text.Utf8; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.maintenance.ControllerMaintenance; import com.yahoo.vespa.hosted.controller.maintenance.Upgrader; @@ -14,11 +14,13 @@ import com.yahoo.vespa.hosted.controller.restapi.ErrorResponse; import com.yahoo.vespa.hosted.controller.restapi.MessageResponse; import com.yahoo.vespa.hosted.controller.restapi.Path; import com.yahoo.vespa.hosted.controller.restapi.ResourceResponse; +import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.util.Scanner; import java.util.logging.Level; /** @@ -67,15 +69,15 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse post(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), false); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return overrideConfidence(request, path.get("version")); return notFound(path); } private HttpResponse delete(HttpRequest request) { Path path = new Path(request.getUri().getPath()); - if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) - return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/maintenance/inactive/{jobName}")) return setActive(path.get("jobName"), true); + if (path.matches("/controller/v1/jobs/upgrader/confidence/{version}")) return removeConfidenceOverride(path.get("version")); return notFound(path); } @@ -100,21 +102,41 @@ public class ControllerApiHandler extends LoggingRequestHandler { private HttpResponse configureUpgrader(HttpRequest request) { String upgradesPerMinuteField = "upgradesPerMinute"; + String confidenceOverrideField = "confidenceOverride"; byte[] jsonBytes = toJsonBytes(request.getData()); Inspector inspect = SlimeUtils.jsonToSlime(jsonBytes).get(); Upgrader upgrader = maintenance.upgrader(); + if (inspect.field(upgradesPerMinuteField).valid()) { upgrader.setUpgradesPerMinute(inspect.field(upgradesPerMinuteField).asDouble()); } else { - return ErrorResponse.badRequest("Unable to configure upgrader with data in request: '" + - Utf8.toString(jsonBytes) + "'"); + return ErrorResponse.badRequest("No such modifiable field(s)"); } return new UpgraderResponse(maintenance.upgrader()); } - private byte[] toJsonBytes(InputStream jsonStream) { + private HttpResponse removeConfidenceOverride(String version) { + maintenance.upgrader().removeConfidenceOverride(Version.fromString(version)); + return new UpgraderResponse(maintenance.upgrader()); + } + + private HttpResponse overrideConfidence(HttpRequest request, String version) { + Confidence confidence = Confidence.valueOf(asString(request.getData())); + maintenance.upgrader().overrideConfidence(Version.fromString(version), confidence); + return new UpgraderResponse(maintenance.upgrader()); + } + + private static String asString(InputStream in) { + Scanner scanner = new Scanner(in).useDelimiter("\\A"); + if (scanner.hasNext()) { + return scanner.next(); + } + return ""; + } + + private static byte[] toJsonBytes(InputStream jsonStream) { try { return IOUtils.readBytes(jsonStream, 1000 * 1000); } catch (IOException e) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java index d6b791d4db0..beb6c98e447 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/controller/UpgraderResponse.java @@ -27,6 +27,11 @@ public class UpgraderResponse extends HttpResponse { Slime slime = new Slime(); Cursor root = slime.setObject(); root.setDouble("upgradesPerMinute", upgrader.upgradesPerMinute()); + Cursor array = root.setArray("confidenceOverrides"); + upgrader.confidenceOverrides().forEach((version, confidence) -> { + Cursor object = array.addObject(); + object.setString(version.toString(), confidence.name()); + }); new JsonFormat(true).encode(outputStream, slime); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java index 86fba06db2f..d628489bc29 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java @@ -178,14 +178,15 @@ public class VersionStatus { Controller controller) { GitSha gitSha = controller.gitHub().getCommit(VESPA_REPO_OWNER, VESPA_REPO, statistics.version().toFullString()); Instant committedAt = Instant.ofEpochMilli(gitSha.commit.author.date.getTime()); - VespaVersion.Confidence confidence; - // Always compute confidence for system version - if (isSystemVersion) { - confidence = VespaVersion.confidenceFrom(statistics, controller); - } else { - // Keep existing confidence for non-system versions if already computed - confidence = confidenceFor(statistics.version(), controller) - .orElse(VespaVersion.confidenceFrom(statistics, controller)); + VespaVersion.Confidence confidence = controller.curator().readConfidenceOverrides().get(statistics.version()); + // Compute confidence if there's no override + if (confidence == null) { + if (isSystemVersion) { // Always compute confidence for system version + confidence = VespaVersion.confidenceFrom(statistics, controller); + } else { // Keep existing confidence for non-system versions if already computed + confidence = confidenceFor(statistics.version(), controller) + .orElse(VespaVersion.confidenceFrom(statistics, controller)); + } } return new VespaVersion(statistics, gitSha.sha, committedAt, diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java index 984e184cdcf..e5f3af8c06a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/ControllerApiTest.java @@ -47,26 +47,43 @@ public class ControllerApiTest extends ControllerContainerTest { // Get current configuration tester.assertResponse(authenticatedRequest("http://localhost:8080/controller/v1/jobs/upgrader", new byte[0], Request.Method.GET), - "{\"upgradesPerMinute\":0.5}", + "{\"upgradesPerMinute\":0.5,\"confidenceOverrides\":[]}", 200); // Set invalid configuration - ; tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":-1}", Request.Method.PATCH), "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Upgrades per minute must be >= 0\"}", 400); - // Unrecognized field + // Ignores unrecognized field tester.assertResponse( - hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":bar}", Request.Method.PATCH), - "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Unable to configure upgrader with data in request: '{\\\"foo\\\":bar}'\"}", + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader","{\"foo\":\"bar\"}", Request.Method.PATCH), + "{\"error-code\":\"BAD_REQUEST\",\"message\":\"No such modifiable field(s)\"}", 400); - // Patch configuration + // Set upgrades per minute tester.assertResponse( hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader", "{\"upgradesPerMinute\":42.0}", Request.Method.PATCH), - "{\"upgradesPerMinute\":42.0}", + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[]}", + 200); + + // Override confidence + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"}]}", + 200); + + // Override confidence for another version + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.43", "broken", Request.Method.POST), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.42\":\"broken\"},{\"6.43\":\"broken\"}]}", + 200); + + // Remove first override + tester.assertResponse( + hostedOperatorRequest("http://localhost:8080/controller/v1/jobs/upgrader/confidence/6.42", "", Request.Method.DELETE), + "{\"upgradesPerMinute\":42.0,\"confidenceOverrides\":[{\"6.43\":\"broken\"}]}", 200); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java index 5ccd1035037..27e26e3267a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java @@ -242,6 +242,34 @@ public class VersionStatusTest { assertEquals("5.3", versions.get(2).versionNumber().toString()); } + @Test + public void testConfidenceOverride() { + DeploymentTester tester = new DeploymentTester(); + Version version0 = new Version("5.0"); + tester.upgradeSystem(version0); + + // Create and deploy application on current version + Application app = tester.createAndDeploy("app", 1, "canary"); + tester.updateVersionStatus(); + assertEquals(Confidence.high, confidence(tester.controller(), version0)); + + // Override confidence + tester.upgrader().overrideConfidence(version0, Confidence.broken); + tester.updateVersionStatus(); + assertEquals(Confidence.broken, confidence(tester.controller(), version0)); + + // New version is released and application upgrades + Version version1 = new Version("5.1"); + tester.upgradeSystem(version1); + tester.completeUpgrade(app, version1, "canary"); + tester.updateVersionStatus(); + assertEquals(Confidence.high, confidence(tester.controller(), version1)); + + // Stale override was removed + assertFalse("Stale override removed", tester.controller().curator().readConfidenceOverrides() + .keySet().contains(version0)); + } + private Confidence confidence(Controller controller, Version version) { return controller.versionStatus().versions().stream() .filter(v -> v.statistics().version().equals(version)) |