diff options
author | Martin Polden <mpolden@mpolden.no> | 2019-04-02 07:59:56 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-02 07:59:56 +0100 |
commit | da41894e5b4f7525ee59d9c69838bdc21735d0f2 (patch) | |
tree | 27e661bf00cda485be7b6445182d0bd6032fae85 /controller-server/src | |
parent | 2a1cd9b08a9ba150a2a295c368c30b40386971f5 (diff) | |
parent | 601d71793fc14b2d943d4e6f10f2fe455f826fca (diff) |
Merge pull request #8973 from vespa-engine/mpolden/deployment-warnings-metric
Report deployment warnings metric
Diffstat (limited to 'controller-server/src')
14 files changed, 232 insertions, 60 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index d68c5caf685..1d685895914 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -41,6 +41,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.routing.RoutingGenerato import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Deployment; +import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.JobList; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.JobStatus.JobRun; @@ -50,11 +51,11 @@ import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade; import com.yahoo.vespa.hosted.controller.concurrent.Once; import com.yahoo.vespa.hosted.controller.deployment.DeploymentSteps; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; -import com.yahoo.vespa.hosted.controller.security.AccessControl; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.rotation.Rotation; import com.yahoo.vespa.hosted.controller.rotation.RotationLock; import com.yahoo.vespa.hosted.controller.rotation.RotationRepository; +import com.yahoo.vespa.hosted.controller.security.AccessControl; import com.yahoo.vespa.hosted.controller.security.Credentials; import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; import com.yahoo.vespa.hosted.controller.tenant.Tenant; @@ -72,6 +73,7 @@ import java.time.Instant; import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -336,7 +338,8 @@ public class ApplicationController { ActivateResult result = deploy(applicationId, applicationPackage, zone, options, rotationNames, cnames); lockOrThrow(applicationId, application -> - store(application.withNewDeployment(zone, applicationVersion, platformVersion, clock.instant()))); + store(application.withNewDeployment(zone, applicationVersion, platformVersion, clock.instant(), + warningsFrom(result)))); return result; } } @@ -761,4 +764,17 @@ public class ApplicationController { .max(naturalOrder()); } + /** Extract deployment warnings metric from deployment result */ + private static Map<DeploymentMetrics.Warning, Integer> warningsFrom(ActivateResult result) { + if (result.prepareResponse().log == null) return Map.of(); + Map<DeploymentMetrics.Warning, Integer> warnings = new HashMap<>(); + for (Log log : result.prepareResponse().log) { + // TODO: Categorize warnings. Response from config server should be updated to include the appropriate + // category and typed log level + if (!"warn".equalsIgnoreCase(log.level) && !"warning".equalsIgnoreCase(log.level)) continue; + warnings.merge(DeploymentMetrics.Warning.all, 1, Integer::sum); + } + return Collections.unmodifiableMap(warnings); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java index 61c10abce14..0cfbc60ad8c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java @@ -148,14 +148,14 @@ public class LockedApplication { } public LockedApplication withNewDeployment(ZoneId zone, ApplicationVersion applicationVersion, Version version, - Instant instant) { + Instant instant, Map<DeploymentMetrics.Warning, Integer> warnings) { // Use info from previous deployment if available, otherwise create a new one. Deployment previousDeployment = deployments.getOrDefault(zone, new Deployment(zone, applicationVersion, version, instant)); Deployment newDeployment = new Deployment(zone, applicationVersion, version, instant, previousDeployment.clusterUtils(), previousDeployment.clusterInfo(), - previousDeployment.metrics(), + previousDeployment.metrics().with(warnings), previousDeployment.activity()); return with(newDeployment); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentActivity.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentActivity.java index 881de040e28..03c08509a5e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentActivity.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentActivity.java @@ -63,7 +63,7 @@ public class DeploymentActivity { public static DeploymentActivity create(Optional<Instant> queriedAt, Optional<Instant> writtenAt, OptionalDouble lastQueriesPerSecond, OptionalDouble lastWritesPerSecond) { - if (!queriedAt.isPresent() && !writtenAt.isPresent()) { + if (queriedAt.isEmpty() && writtenAt.isEmpty()) { return none; } return new DeploymentActivity(queriedAt, writtenAt, lastQueriesPerSecond, lastWritesPerSecond); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentMetrics.java index 1ee461cbb8d..7a50184e7a4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentMetrics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentMetrics.java @@ -2,16 +2,20 @@ package com.yahoo.vespa.hosted.controller.application; import java.time.Instant; +import java.util.Map; +import java.util.Objects; import java.util.Optional; /** - * Metrics for a deployment of an application. + * Metrics for a deployment of an application. This contains a snapshot of metrics gathered at a point in time, it does + * not contain any historical data. * * @author smorgrav + * @author mpolden */ public class DeploymentMetrics { - public static final DeploymentMetrics none = new DeploymentMetrics(0, 0, 0, 0, 0); + public static final DeploymentMetrics none = new DeploymentMetrics(0, 0, 0, 0, 0, Optional.empty(), Map.of()); private final double queriesPerSecond; private final double writesPerSecond; @@ -19,44 +23,97 @@ public class DeploymentMetrics { private final double queryLatencyMillis; private final double writeLatencyMills; private final Optional<Instant> instant; + private final Map<Warning, Integer> warnings; + /* DO NOT USE. Public for serialization purposes */ public DeploymentMetrics(double queriesPerSecond, double writesPerSecond, double documentCount, - double queryLatencyMillis, double writeLatencyMills) { - this(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, writeLatencyMills, Optional.empty()); - } - - public DeploymentMetrics(double queriesPerSecond, double writesPerSecond, double documentCount, - double queryLatencyMillis, double writeLatencyMills, Optional<Instant> instant) { + double queryLatencyMillis, double writeLatencyMills, Optional<Instant> instant, + Map<Warning, Integer> warnings) { this.queriesPerSecond = queriesPerSecond; this.writesPerSecond = writesPerSecond; this.documentCount = documentCount; this.queryLatencyMillis = queryLatencyMillis; this.writeLatencyMills = writeLatencyMills; - this.instant = instant; + this.instant = Objects.requireNonNull(instant, "instant must be non-null"); + this.warnings = Map.copyOf(Objects.requireNonNull(warnings, "warnings must be non-null")); + if (warnings.entrySet().stream().anyMatch(kv -> kv.getValue() < 0)) { + throw new IllegalArgumentException("Warning count must be non-negative. Got " + warnings); + } } + /** Returns the number of queries per second */ public double queriesPerSecond() { return queriesPerSecond; } + /** Returns the number of writes per second */ public double writesPerSecond() { return writesPerSecond; } + /** Returns the number of documents */ public double documentCount() { return documentCount; } + /** Returns the average query latency in milliseconds */ public double queryLatencyMillis() { return queryLatencyMillis; } + /** Returns the average write latency in milliseconds */ public double writeLatencyMillis() { return writeLatencyMills; } + /** Returns the approximate time this was measured */ public Optional<Instant> instant() { return instant; } + /** Returns the number of warnings of the most recent deployment */ + public Map<Warning, Integer> warnings() { + return warnings; + } + + public DeploymentMetrics withQueriesPerSecond(double queriesPerSecond) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + public DeploymentMetrics withWritesPerSecond(double writesPerSecond) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + public DeploymentMetrics withDocumentCount(double documentCount) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + public DeploymentMetrics withQueryLatencyMillis(double queryLatencyMillis) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + public DeploymentMetrics withWriteLatencyMillis(double writeLatencyMills) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + public DeploymentMetrics at(Instant instant) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, Optional.of(instant), warnings); + } + + public DeploymentMetrics with(Map<Warning, Integer> warnings) { + return new DeploymentMetrics(queriesPerSecond, writesPerSecond, documentCount, queryLatencyMillis, + writeLatencyMills, instant, warnings); + } + + /** Types of deployment warnings. We currently have only one */ + public enum Warning { + all + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index c64bc99d0c7..5c9489d415f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -604,7 +604,7 @@ public class InternalStepRunner implements StepRunner { " </container>\n" + "</services>\n"; - return servicesXml.getBytes(); + return servicesXml.getBytes(StandardCharsets.UTF_8); } /** Returns a dummy deployment xml which sets up the service identity for the tester, if present. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index c3474a485ca..787a050e59e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -11,10 +11,10 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.RotationStatus; import java.time.Duration; +import java.time.Instant; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.TreeMap; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.TimeUnit; @@ -61,19 +61,20 @@ public class DeploymentMetricsMaintainer extends Maintainer { applications.store(locked.withRotationStatus(rotationStatus(application)))); for (Deployment deployment : application.deployments().values()) { - MetricsService.DeploymentMetrics deploymentMetrics = controller().metricsService() - .getDeploymentMetrics(application.id(), deployment.zone()); - - DeploymentMetrics newMetrics = new DeploymentMetrics(deploymentMetrics.queriesPerSecond(), - deploymentMetrics.writesPerSecond(), - deploymentMetrics.documentCount(), - deploymentMetrics.queryLatencyMillis(), - deploymentMetrics.writeLatencyMillis(), - Optional.of(controller().clock().instant())); - - applications.lockIfPresent(application.id(), locked -> - applications.store(locked.with(deployment.zone(), newMetrics) - .recordActivityAt(controller().clock().instant(), deployment.zone()))); + MetricsService.DeploymentMetrics collectedMetrics = controller().metricsService() + .getDeploymentMetrics(application.id(), deployment.zone()); + Instant now = controller().clock().instant(); + applications.lockIfPresent(application.id(), locked -> { + DeploymentMetrics newMetrics = locked.get().deployments().get(deployment.zone()).metrics() + .withQueriesPerSecond(collectedMetrics.queriesPerSecond()) + .withWritesPerSecond(collectedMetrics.writesPerSecond()) + .withDocumentCount(collectedMetrics.documentCount()) + .withQueryLatencyMillis(collectedMetrics.queryLatencyMillis()) + .withWriteLatencyMillis(collectedMetrics.writeLatencyMillis()) + .at(now); + applications.store(locked.with(deployment.zone(), newMetrics) + .recordActivityAt(now, deployment.zone())); + }); } } catch (Exception e) { failures.incrementAndGet(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index 07680fc8b72..037e7d8ced6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -13,6 +13,8 @@ import com.yahoo.vespa.hosted.controller.api.integration.chef.rest.PartialNode; import com.yahoo.vespa.hosted.controller.api.integration.chef.rest.PartialNodeResult; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.ApplicationList; +import com.yahoo.vespa.hosted.controller.application.Deployment; +import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.JobList; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.rotation.RotationLock; @@ -20,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.rotation.RotationLock; import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -33,12 +36,13 @@ import java.util.stream.Collectors; */ public class MetricsReporter extends Maintainer { - public static final String convergeMetric = "seconds.since.last.chef.convergence"; - public static final String deploymentFailMetric = "deployment.failurePercentage"; - public static final String deploymentAverageDuration = "deployment.averageDuration"; - public static final String deploymentFailingUpgrades = "deployment.failingUpgrades"; - public static final String deploymentBuildAgeSeconds = "deployment.buildAgeSeconds"; - public static final String remainingRotations = "remaining_rotations"; + public static final String CONVERGENCE_METRIC = "seconds.since.last.chef.convergence"; + public static final String DEPLOYMENT_FAIL_METRIC = "deployment.failurePercentage"; + public static final String DEPLOYMENT_AVERAGE_DURATION = "deployment.averageDuration"; + public static final String DEPLOYMENT_FAILING_UPGRADES = "deployment.failingUpgrades"; + public static final String DEPLOYMENT_BUILD_AGE_SECONDS = "deployment.buildAgeSeconds"; + public static final String DEPLOYMENT_WARNINGS = "deployment.warnings"; + public static final String REMAINING_ROTATIONS = "remaining_rotations"; private final Metric metric; private final Chef chefClient; @@ -69,7 +73,7 @@ public class MetricsReporter extends Maintainer { private void reportRemainingRotations() { try (RotationLock lock = controller().applications().rotationRepository().lock()) { int availableRotations = controller().applications().rotationRepository().availableRotations(lock).size(); - metric.set(remainingRotations, availableRotations, metric.createContext(Collections.emptyMap())); + metric.set(REMAINING_ROTATIONS, availableRotations, metric.createContext(Collections.emptyMap())); } } @@ -104,7 +108,7 @@ public class MetricsReporter extends Maintainer { Optional<String> environment = node.getValue("environment"); Optional<String> region = node.getValue("region"); - if(environment.isPresent() && region.isPresent()) { + if (environment.isPresent() && region.isPresent()) { dimensions.put("zone", String.format("%s.%s", environment.get(), region.get())); } @@ -112,7 +116,7 @@ public class MetricsReporter extends Maintainer { Optional<String> application = node.getValue("application"); application.ifPresent(app -> dimensions.put("app", String.format("%s.%s", app, node.getValue("instance").orElse("default")))); Metric.Context context = metric.createContext(dimensions); - metric.set(convergeMetric, secondsSinceConverge, context); + metric.set(CONVERGENCE_METRIC, secondsSinceConverge, context); } } @@ -120,21 +124,25 @@ public class MetricsReporter extends Maintainer { ApplicationList applications = ApplicationList.from(controller().applications().asList()) .hasProductionDeployment(); - metric.set(deploymentFailMetric, deploymentFailRatio(applications) * 100, metric.createContext(Collections.emptyMap())); + metric.set(DEPLOYMENT_FAIL_METRIC, deploymentFailRatio(applications) * 100, metric.createContext(Collections.emptyMap())); averageDeploymentDurations(applications, clock.instant()).forEach((application, duration) -> { - metric.set(deploymentAverageDuration, duration.getSeconds(), metric.createContext(dimensions(application))); + metric.set(DEPLOYMENT_AVERAGE_DURATION, duration.getSeconds(), metric.createContext(dimensions(application))); }); deploymentsFailingUpgrade(applications).forEach((application, failingJobs) -> { - metric.set(deploymentFailingUpgrades, failingJobs, metric.createContext(dimensions(application))); + metric.set(DEPLOYMENT_FAILING_UPGRADES, failingJobs, metric.createContext(dimensions(application))); + }); + + deploymentWarnings(applications).forEach((application, warnings) -> { + metric.set(DEPLOYMENT_WARNINGS, warnings, metric.createContext(dimensions(application))); }); for (Application application : applications.asList()) application.deploymentJobs().statusOf(JobType.component) .flatMap(JobStatus::lastSuccess) .flatMap(run -> run.application().buildTime()) - .ifPresent(buildTime -> metric.set(deploymentBuildAgeSeconds, + .ifPresent(buildTime -> metric.set(DEPLOYMENT_BUILD_AGE_SECONDS, controller().clock().instant().getEpochSecond() - buildTime.getEpochSecond(), metric.createContext(dimensions(application.id())))); } @@ -180,6 +188,21 @@ public class MetricsReporter extends Maintainer { .map(totalDuration -> totalDuration.dividedBy(jobDurations.size())) .orElse(Duration.ZERO); } + + private static Map<ApplicationId, Integer> deploymentWarnings(ApplicationList applications) { + return applications.asList().stream() + .collect(Collectors.toMap(Application::id, a -> maxWarningCountOf(a.deployments().values()))); + } + + private static int maxWarningCountOf(Collection<Deployment> deployments) { + return deployments.stream() + .map(Deployment::metrics) + .map(DeploymentMetrics::warnings) + .map(Map::values) + .flatMap(Collection::stream) + .max(Integer::compareTo) + .orElse(0); + } private static void keepNodesWithSystem(PartialNodeResult nodeResult, SystemName system) { nodeResult.rows.removeIf(node -> !system.name().equals(node.getValue("system").orElse("main"))); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java index 0c79c5893c6..8433b2f368c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java @@ -10,6 +10,7 @@ import com.yahoo.config.provision.HostName; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; +import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; @@ -140,6 +141,7 @@ public class ApplicationSerializer { private final String deploymentMetricsQueryLatencyField = "queryLatencyMillis"; private final String deploymentMetricsWriteLatencyField = "writeLatencyMillis"; private final String deploymentMetricsUpdateTime = "lastUpdated"; + private final String deploymentMetricsWarningsField = "warnings"; // ------------------ Serialization @@ -192,6 +194,10 @@ public class ApplicationSerializer { root.setDouble(deploymentMetricsQueryLatencyField, metrics.queryLatencyMillis()); root.setDouble(deploymentMetricsWriteLatencyField, metrics.writeLatencyMillis()); metrics.instant().ifPresent(instant -> root.setLong(deploymentMetricsUpdateTime, instant.toEpochMilli())); + if (!metrics.warnings().isEmpty()) { + Cursor warningsObject = root.setObject(deploymentMetricsWarningsField); + metrics.warnings().forEach((warning, count) -> warningsObject.setLong(warning.name(), count)); + } } private void clusterInfoToSlime(Map<ClusterSpec.Id, ClusterInfo> clusters, Cursor object) { @@ -360,7 +366,15 @@ public class ApplicationSerializer { object.field(deploymentMetricsDocsField).asDouble(), object.field(deploymentMetricsQueryLatencyField).asDouble(), object.field(deploymentMetricsWriteLatencyField).asDouble(), - instant); + instant, + deploymentWarningsFrom(object.field(deploymentMetricsWarningsField))); + } + + private Map<DeploymentMetrics.Warning, Integer> deploymentWarningsFrom(Inspector object) { + Map<DeploymentMetrics.Warning, Integer> warnings = new HashMap<>(); + object.traverse((ObjectTraverser) (name, value) -> warnings.put(DeploymentMetrics.Warning.valueOf(name), + (int) value.asLong())); + return Collections.unmodifiableMap(warnings); } private Map<HostName, RotationStatus> rotationStatusFromSlime(Inspector object) { @@ -376,9 +390,9 @@ public class ApplicationSerializer { return Collections.unmodifiableMap(rotationStatus); } - private Map<ClusterSpec.Id, ClusterInfo> clusterInfoMapFromSlime(Inspector object) { + private Map<ClusterSpec.Id, ClusterInfo> clusterInfoMapFromSlime (Inspector object) { Map<ClusterSpec.Id, ClusterInfo> map = new HashMap<>(); - object.traverse((String name, Inspector obect) -> map.put(new ClusterSpec.Id(name), clusterInfoFromSlime(obect))); + object.traverse((String name, Inspector value) -> map.put(new ClusterSpec.Id(name), clusterInfoFromSlime(value))); return map; } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index c41351f12b0..1f00d99350a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -24,6 +24,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; +import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.BuildJob; @@ -534,6 +535,22 @@ public class ControllerTest { tester.deployCompletely(application, applicationPackage); } + @Test + public void testDeployApplicationWithWarnings() { + DeploymentTester tester = new DeploymentTester(); + Application application = tester.createApplication("app1", "tenant1", 1, 1L); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("us-west-1") + .build(); + ZoneId zone = ZoneId.from("prod", "us-west-1"); + int warnings = 3; + tester.configServer().generateWarnings(new DeploymentId(application.id(), zone), warnings); + tester.deployCompletely(application, applicationPackage); + assertEquals(warnings, tester.applications().require(application.id()).deployments().get(zone) + .metrics().warnings().get(DeploymentMetrics.Warning.all).intValue()); + } + private void runUpgrade(DeploymentTester tester, ApplicationId application, ApplicationVersion version) { Version next = Version.fromString("6.2"); tester.upgradeSystem(next); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index eaccb5fa12d..803f56fc0d7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -29,9 +29,9 @@ import com.yahoo.vespa.serviceview.bindings.ApplicationView; import com.yahoo.vespa.serviceview.bindings.ClusterView; import com.yahoo.vespa.serviceview.bindings.ServiceView; import org.apache.commons.io.IOUtils; -import org.apache.http.impl.io.EmptyInputStream; import java.io.InputStream; +import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -57,6 +58,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer private final Version initialVersion = new Version(6, 1, 0); private final Set<DeploymentId> suspendedApplications = new HashSet<>(); private final Map<ZoneId, List<LoadBalancer>> loadBalancers = new HashMap<>(); + private final Map<DeploymentId, List<Log>> warnings = new HashMap<>(); private Version lastPrepareVersion = null; private RuntimeException prepareException = null; @@ -164,6 +166,18 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer suspendedApplications.remove(deployment); } + public void generateWarnings(DeploymentId deployment, int count) { + List<Log> logs = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + Log log = new Log(); + log.time = Instant.now().toEpochMilli(); + log.level = Level.WARNING.getName(); + log.message = "log message " + (count + 1) + " generated by unit test"; + logs.add(log); + } + warnings.put(deployment, List.copyOf(logs)); + } + @Override public NodeRepositoryMock nodeRepository() { return nodeRepository; @@ -258,7 +272,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer Collections.emptyList()); setConfigChangeActions(null); prepareResponse.tenant = new TenantId("tenant"); - prepareResponse.log = Collections.emptyList(); + prepareResponse.log = warnings.getOrDefault(deployment, Collections.emptyList()); return prepareResponse; } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java index e11440a372c..de9da83826d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java @@ -41,11 +41,13 @@ public class DeploymentMetricsMaintainerTest { // No metrics gathered yet assertEquals(0, app.get().metrics().queryServiceQuality(), 0); assertEquals(0, deployment.get().metrics().documentCount(), 0); + assertFalse("No timestamp set", deployment.get().metrics().instant().isPresent()); assertFalse("Never received any queries", deployment.get().activity().lastQueried().isPresent()); assertFalse("Never received any writes", deployment.get().activity().lastWritten().isPresent()); // Metrics are gathered and saved to application maintainer.maintain(); + Instant t1 = tester.clock().instant().truncatedTo(MILLIS); assertEquals(0.5, app.get().metrics().queryServiceQuality(), Double.MIN_VALUE); assertEquals(0.7, app.get().metrics().writeServiceQuality(), Double.MIN_VALUE); assertEquals(1, deployment.get().metrics().queriesPerSecond(), Double.MIN_VALUE); @@ -53,7 +55,7 @@ public class DeploymentMetricsMaintainerTest { assertEquals(3, deployment.get().metrics().documentCount(), Double.MIN_VALUE); assertEquals(4, deployment.get().metrics().queryLatencyMillis(), Double.MIN_VALUE); assertEquals(5, deployment.get().metrics().writeLatencyMillis(), Double.MIN_VALUE); - Instant t1 = tester.clock().instant().truncatedTo(MILLIS); + assertEquals(t1, deployment.get().metrics().instant().get()); assertEquals(t1, deployment.get().activity().lastQueried().get()); assertEquals(t1, deployment.get().activity().lastWritten().get()); @@ -61,6 +63,7 @@ public class DeploymentMetricsMaintainerTest { tester.clock().advance(Duration.ofHours(1)); Instant t2 = tester.clock().instant().truncatedTo(MILLIS); maintainer.maintain(); + assertEquals(t2, deployment.get().metrics().instant().get()); assertEquals(t2, deployment.get().activity().lastQueried().get()); assertEquals(t2, deployment.get().activity().lastWritten().get()); assertEquals(1, deployment.get().activity().lastQueriesPerSecond().getAsDouble(), Double.MIN_VALUE); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java index de23a675794..86370980729 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java @@ -6,12 +6,15 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.yahoo.component.Version; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.SystemName; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.chef.ChefMock; import com.yahoo.vespa.hosted.controller.api.integration.chef.rest.PartialNodeResult; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; +import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; @@ -29,7 +32,6 @@ import java.nio.file.Paths; import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.time.ZoneId; import java.util.Map; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.component; @@ -56,7 +58,7 @@ public class MetricsReporterTest { @Test public void test_chef_metrics() { - Clock clock = Clock.fixed(Instant.ofEpochSecond(1475497913), ZoneId.systemDefault()); + Clock clock = new ManualClock(Instant.ofEpochSecond(1475497913)); ControllerTester tester = new ControllerTester(); MetricsReporter metricsReporter = createReporter(clock, tester.controller(), metrics, SystemName.cd); metricsReporter.maintain(); @@ -69,7 +71,7 @@ public class MetricsReporterTest { assertDimension(metricContext, "tenantName", "ciintegrationtests"); assertDimension(metricContext, "app", "restart.default"); assertDimension(metricContext, "zone", "prod.cd-us-east-1"); - assertEquals(727, metricEntry.getValue().get(MetricsReporter.convergeMetric).longValue()); + assertEquals(727, metricEntry.getValue().get(MetricsReporter.CONVERGENCE_METRIC).longValue()); } @Test @@ -82,7 +84,7 @@ public class MetricsReporterTest { MetricsReporter metricsReporter = createReporter(tester.controller(), metrics, SystemName.main); metricsReporter.maintain(); - assertEquals(0.0, metrics.getMetric(MetricsReporter.deploymentFailMetric)); + assertEquals(0.0, metrics.getMetric(MetricsReporter.DEPLOYMENT_FAIL_METRIC)); // Deploy all apps successfully Application app1 = tester.createApplication("app1", "tenant1", 1, 11L); @@ -95,14 +97,14 @@ public class MetricsReporterTest { tester.deployCompletely(app4, applicationPackage); metricsReporter.maintain(); - assertEquals(0.0, metrics.getMetric(MetricsReporter.deploymentFailMetric)); + assertEquals(0.0, metrics.getMetric(MetricsReporter.DEPLOYMENT_FAIL_METRIC)); // 1 app fails system-test tester.jobCompletion(component).application(app4).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app4, applicationPackage, false, systemTest); metricsReporter.maintain(); - assertEquals(25.0, metrics.getMetric(MetricsReporter.deploymentFailMetric)); + assertEquals(25.0, metrics.getMetric(MetricsReporter.DEPLOYMENT_FAIL_METRIC)); } @Test @@ -207,7 +209,24 @@ public class MetricsReporterTest { } @Test - public void testBuildTimeReporting() { + public void test_deployment_warnings_metric() { + DeploymentTester tester = new DeploymentTester(); + ApplicationPackage applicationPackage = new ApplicationPackageBuilder() + .environment(Environment.prod) + .region("us-west-1") + .region("us-east-3") + .build(); + MetricsReporter reporter = createReporter(tester.controller(), metrics, SystemName.main); + Application application = tester.createApplication("app1", "tenant1", 1, 11L); + tester.configServer().generateWarnings(new DeploymentId(application.id(), ZoneId.from("prod", "us-west-1")), 3); + tester.configServer().generateWarnings(new DeploymentId(application.id(), ZoneId.from("prod", "us-east-3")), 4); + tester.deployCompletely(application, applicationPackage); + reporter.maintain(); + assertEquals(4, getDeploymentWarnings(application)); + } + + @Test + public void test_build_time_reporting() { InternalDeploymentTester tester = new InternalDeploymentTester(); ApplicationVersion version = tester.deployNewSubmission(); assertEquals(1000, version.buildTime().get().toEpochMilli()); @@ -215,15 +234,19 @@ public class MetricsReporterTest { MetricsReporter reporter = createReporter(tester.tester().controller(), metrics, SystemName.main); reporter.maintain(); assertEquals(tester.clock().instant().getEpochSecond() - 1, - getMetric(MetricsReporter.deploymentBuildAgeSeconds, tester.app())); + getMetric(MetricsReporter.DEPLOYMENT_BUILD_AGE_SECONDS, tester.app())); } private Duration getAverageDeploymentDuration(Application application) { - return Duration.ofSeconds(getMetric(MetricsReporter.deploymentAverageDuration, application).longValue()); + return Duration.ofSeconds(getMetric(MetricsReporter.DEPLOYMENT_AVERAGE_DURATION, application).longValue()); } private int getDeploymentsFailingUpgrade(Application application) { - return getMetric(MetricsReporter.deploymentFailingUpgrades, application).intValue(); + return getMetric(MetricsReporter.DEPLOYMENT_FAILING_UPGRADES, application).intValue(); + } + + private int getDeploymentWarnings(Application application) { + return getMetric(MetricsReporter.DEPLOYMENT_WARNINGS, application).intValue(); } private Number getMetric(String name, Application application) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java index dd4558dbe2c..e0debf6d5db 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java @@ -77,7 +77,9 @@ public class ApplicationSerializerTest { deployments.add(new Deployment(zone1, applicationVersion1, Version.fromString("1.2.3"), Instant.ofEpochMilli(3))); // One deployment without cluster info and utils deployments.add(new Deployment(zone2, applicationVersion2, Version.fromString("1.2.3"), Instant.ofEpochMilli(5), createClusterUtils(3, 0.2), createClusterInfo(3, 4), - new DeploymentMetrics(2, 3, 4, 5, 6, Optional.of(Instant.now().truncatedTo(ChronoUnit.MILLIS))), + new DeploymentMetrics(2, 3, 4, 5, 6, + Optional.of(Instant.now().truncatedTo(ChronoUnit.MILLIS)), + Map.of(DeploymentMetrics.Warning.all, 3)), DeploymentActivity.create(Optional.of(activityAt), Optional.of(activityAt), OptionalDouble.of(200), OptionalDouble.of(10)))); @@ -178,6 +180,7 @@ public class ApplicationSerializerTest { assertEquals(original.deployments().get(zone2).metrics().queryLatencyMillis(), serialized.deployments().get(zone2).metrics().queryLatencyMillis(), Double.MIN_VALUE); assertEquals(original.deployments().get(zone2).metrics().writeLatencyMillis(), serialized.deployments().get(zone2).metrics().writeLatencyMillis(), Double.MIN_VALUE); assertEquals(original.deployments().get(zone2).metrics().instant(), serialized.deployments().get(zone2).metrics().instant()); + assertEquals(original.deployments().get(zone2).metrics().warnings(), serialized.deployments().get(zone2).metrics().warnings()); { // test more deployment serialization cases Application original2 = writable(original).withChange(Change.of(ApplicationVersion.from(new SourceRevision("repo1", "branch1", "commit1"), 42))).get(); Application serialized2 = applicationSerializer.fromSlime(applicationSerializer.toSlime(original2)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index e8d5c8b8ca4..8c28c289889 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -1525,7 +1525,8 @@ public class ApplicationApiTest extends ControllerContainerTest { clusterInfo.put(ClusterSpec.Id.from("cluster1"), new ClusterInfo("flavor1", 37, 2, 4, 50, ClusterSpec.Type.content, hostnames)); Map<ClusterSpec.Id, ClusterUtilization> clusterUtils = new HashMap<>(); clusterUtils.put(ClusterSpec.Id.from("cluster1"), new ClusterUtilization(0.3, 0.6, 0.4, 0.3)); - DeploymentMetrics metrics = new DeploymentMetrics(1, 2, 3, 4, 5, Optional.of(Instant.ofEpochMilli(123123))); + DeploymentMetrics metrics = new DeploymentMetrics(1, 2, 3, 4, 5, + Optional.of(Instant.ofEpochMilli(123123)), Map.of()); lockedApplication = lockedApplication .withClusterInfo(deployment.zone(), clusterInfo) |