diff options
18 files changed, 133 insertions, 61 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 38f8cd67601..e8a30bfb352 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -527,8 +527,10 @@ public class VespaMetricSet { metrics.add(new Metric("vds.visitor.allthreads.queuesize.count.sum")); metrics.add(new Metric("vds.visitor.allthreads.queuesize.count.count")); metrics.add(new Metric("vds.visitor.allthreads.queuesize.count.average")); // TODO: Remove in Vespa 8 - metrics.add(new Metric("vds.visitor.allthreads.completed.sum.average")); + metrics.add(new Metric("vds.visitor.allthreads.completed.sum.average")); // TODO: Remove in Vespa 8 + metrics.add(new Metric("vds.visitor.allthreads.completed.sum.rate")); metrics.add(new Metric("vds.visitor.allthreads.created.sum.rate")); + metrics.add(new Metric("vds.visitor.allthreads.failed.sum.rate")); metrics.add(new Metric("vds.visitor.allthreads.averagemessagesendtime.sum.max")); metrics.add(new Metric("vds.visitor.allthreads.averagemessagesendtime.sum.sum")); metrics.add(new Metric("vds.visitor.allthreads.averagemessagesendtime.sum.count")); @@ -537,19 +539,27 @@ public class VespaMetricSet { metrics.add(new Metric("vds.visitor.allthreads.averageprocessingtime.sum.sum")); metrics.add(new Metric("vds.visitor.allthreads.averageprocessingtime.sum.count")); metrics.add(new Metric("vds.visitor.allthreads.averageprocessingtime.sum.average")); // TODO: Remove in Vespa 8 - + + metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.failed.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.max")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.sum")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.count")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.put.sum.latency.average")); // TODO: Remove in Vespa 8 + metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.failed.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.max")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.sum")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.count")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.remove.sum.latency.average")); // TODO: Remove in Vespa 8 + metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.failed.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.max")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.sum")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.count")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.get.sum.latency.average")); // TODO: Remove in Vespa 8 + metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.failed.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.latency.max")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.latency.sum")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.update.sum.latency.count")); @@ -569,13 +579,13 @@ public class VespaMetricSet { metrics.add(new Metric("vds.filestor.alldisks.allthreads.splitbuckets.count.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.joinbuckets.count.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.count.rate")); + metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.failed.rate")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.latency.max")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.latency.sum")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.latency.count")); metrics.add(new Metric("vds.filestor.alldisks.allthreads.deletebuckets.latency.average")); // TODO: Remove in Vespa 8 metrics.add(new Metric("vds.filestor.alldisks.allthreads.setbucketstates.count.rate")); - //Distributor metrics.add(new Metric("vds.idealstate.buckets_rechecking.average")); metrics.add(new Metric("vds.idealstate.idealstate_diff.average")); @@ -605,18 +615,24 @@ public class VespaMetricSet { metrics.add(new Metric("vds.distributor.puts.sum.latency.average")); // TODO: Remove in Vespa 8 metrics.add(new Metric("vds.distributor.puts.sum.ok.rate")); metrics.add(new Metric("vds.distributor.puts.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.puts.sum.failures.notfound.rate")); + metrics.add(new Metric("vds.distributor.puts.sum.failures.test_and_set_failed")); metrics.add(new Metric("vds.distributor.removes.sum.latency.max")); metrics.add(new Metric("vds.distributor.removes.sum.latency.sum")); metrics.add(new Metric("vds.distributor.removes.sum.latency.count")); metrics.add(new Metric("vds.distributor.removes.sum.latency.average")); // TODO: Remove in Vespa 8 metrics.add(new Metric("vds.distributor.removes.sum.ok.rate")); metrics.add(new Metric("vds.distributor.removes.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.removes.sum.failures.notfound.rate")); + metrics.add(new Metric("vds.distributor.removes.sum.failures.test_and_set_failed")); metrics.add(new Metric("vds.distributor.updates.sum.latency.max")); metrics.add(new Metric("vds.distributor.updates.sum.latency.sum")); metrics.add(new Metric("vds.distributor.updates.sum.latency.count")); metrics.add(new Metric("vds.distributor.updates.sum.latency.average")); // TODO: Remove in Vespa 8 metrics.add(new Metric("vds.distributor.updates.sum.ok.rate")); metrics.add(new Metric("vds.distributor.updates.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.updates.sum.failures.notfound.rate")); + metrics.add(new Metric("vds.distributor.updates.sum.failures.test_and_set_failed")); metrics.add(new Metric("vds.distributor.updates.sum.diverging_timestamp_updates.rate")); metrics.add(new Metric("vds.distributor.removelocations.sum.ok.rate")); metrics.add(new Metric("vds.distributor.removelocations.sum.failures.total.rate")); @@ -626,6 +642,7 @@ public class VespaMetricSet { metrics.add(new Metric("vds.distributor.gets.sum.latency.average")); // TODO: Remove in Vespa 8 metrics.add(new Metric("vds.distributor.gets.sum.ok.rate")); metrics.add(new Metric("vds.distributor.gets.sum.failures.total.rate")); + metrics.add(new Metric("vds.distributor.gets.sum.failures.notfound.rate")); metrics.add(new Metric("vds.distributor.visitor.sum.latency.max")); metrics.add(new Metric("vds.distributor.visitor.sum.latency.sum")); metrics.add(new Metric("vds.distributor.visitor.sum.latency.count")); diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java index 998af030b6b..67a6faac606 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/role/PathGroup.java @@ -31,7 +31,9 @@ enum PathGroup { "/os/v1/{*}", "/provision/v2/{*}", "/zone/v2/{*}", - "/routing/v1/{*}"), + "/routing/v1/", + "/routing/v1/status/environment/{*}", + "/routing/v1/inactive/environment/{*}"), /** Paths used for creating and reading user resources. */ user(Optional.of("/api"), @@ -53,7 +55,8 @@ enum PathGroup { Optional.of("/api"), "/application/v4/tenant/{tenant}/application/", "/application/v4/tenant/{tenant}/cost", - "/application/v4/tenant/{tenant}/cost/{date}"), + "/application/v4/tenant/{tenant}/cost/{date}", + "/routing/v1/status/tenant/{tenant}/{*}"), tenantKeys(Matcher.tenant, Optional.of("/api"), @@ -97,7 +100,8 @@ enum PathGroup { "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/suspended", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/service/{*}", "/application/v4/tenant/{tenant}/application/{application}/environment/{environment}/region/{region}/instance/{ignored}/global-rotation/{*}", - "/application/v4/tenant/{tenant}/application/{application}/metering"), + "/application/v4/tenant/{tenant}/application/{application}/metering", + "/routing/v1/inactive/tenant/{tenant}/application/{application}/instance/{ignored}/environment/prod/region/{region}"), // TODO jonmv: remove /** Path used to restart development nodes. */ diff --git a/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/role/RoleTest.java b/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/role/RoleTest.java index da2f64f2893..5348185c276 100644 --- a/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/role/RoleTest.java +++ b/controller-api/src/test/java/com/yahoo/vespa/hosted/controller/api/role/RoleTest.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.TenantName; import org.junit.Test; import java.net.URI; +import java.util.List; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -30,6 +31,10 @@ public class RoleTest { assertTrue(mainEnforcer.allows(role, Action.update, URI.create("/os/v1/bar"))); assertTrue(mainEnforcer.allows(role, Action.update, URI.create("/application/v4/tenant/t1/application/a1"))); assertTrue(mainEnforcer.allows(role, Action.update, URI.create("/application/v4/tenant/t2/application/a2"))); + assertTrue(mainEnforcer.allows(role, Action.read, URI.create("/routing/v1/"))); + assertTrue(mainEnforcer.allows(role, Action.read, URI.create("/routing/v1/status/environment/"))); + assertTrue(mainEnforcer.allows(role, Action.read, URI.create("/routing/v1/status/environment/prod"))); + assertTrue(mainEnforcer.allows(role, Action.create, URI.create("/routing/v1/inactive/environment/prod/region/us-north-1"))); } @Test @@ -165,4 +170,31 @@ public class RoleTest { assertTrue(mainEnforcer.allows(Role.systemFlagsDryrunner(), action, dryrunUri)); assertFalse(mainEnforcer.allows(Role.everyone(), action, dryrunUri)); } + + @Test + public void routing() { + var tenantUrl = URI.create("/routing/v1/status/tenant/t1"); + var applicationUrl = URI.create("/routing/v1/status/tenant/t1/application/a1"); + var instanceUrl = URI.create("/routing/v1/status/tenant/t1/application/a1/instance/i1"); + var deploymentUrl = URI.create("/routing/v1/status/tenant/t1/application/a1/instance/i1/environment/prod/region/us-north-1"); + // Read + for (var url : List.of(tenantUrl, applicationUrl, instanceUrl, deploymentUrl)) { + var allowedRole = Role.reader(TenantName.from("t1")); + var disallowedRole = Role.reader(TenantName.from("t2")); + assertTrue(allowedRole + " can read " + url, mainEnforcer.allows(allowedRole, Action.read, url)); + assertFalse(disallowedRole + " cannot read " + url, mainEnforcer.allows(disallowedRole, Action.read, url)); + } + + // Write + { + var url = URI.create("/routing/v1/inactive/tenant/t1/application/a1/instance/i1/environment/prod/region/us-north-1"); + var allowedRole = Role.applicationAdmin(TenantName.from("t1"), ApplicationName.from("a1")); + var disallowedRole = Role.applicationAdmin(TenantName.from("t2"), ApplicationName.from("a2")); + assertTrue(allowedRole + " can override status at " + url, mainEnforcer.allows(allowedRole, Action.create, url)); + assertTrue(allowedRole + " can clear status at " + url, mainEnforcer.allows(allowedRole, Action.delete, url)); + assertFalse(disallowedRole + " cannot override status at " + url, mainEnforcer.allows(disallowedRole, Action.create, url)); + assertFalse(disallowedRole + " cannot clear status at " + url, mainEnforcer.allows(disallowedRole, Action.delete, url)); + } + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 3a60c480100..6aebae66bad 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -17,7 +17,6 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; -import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import java.time.Duration; import java.time.Instant; @@ -579,8 +578,12 @@ public class DeploymentStatus { Versions versions = Versions.from(change, status.application, status.deploymentFor(job.id()), status.systemVersion); return job.lastSuccess() .filter(run -> versions.targetsMatch(run.versions())) - .filter(run -> status.instanceJobs(instance).get(prodType).lastCompleted() - .map(last -> ! last.end().get().isAfter(run.start())).orElse(false)) + .filter(run -> ! status.jobs() + .instance(instance) + .type(prodType) + .successOn(versions) + .lastCompleted().endedNoLaterThan(run.start()) + .isEmpty()) .map(run -> run.end().get()); } }; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java index c14493a0b72..efa21b71936 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java @@ -3,15 +3,9 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.collections.AbstractFilteringList; import com.yahoo.component.Version; -import com.yahoo.config.application.api.DeploymentSpec; -import com.yahoo.vespa.hosted.controller.Instance; -import com.yahoo.vespa.hosted.controller.application.ApplicationList; -import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import java.time.Instant; import java.util.Collection; -import java.util.List; -import java.util.Optional; /** * List for filtering deployment status of applications, for inspection and decision making. @@ -48,14 +42,14 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus private static boolean failingUpgradeToVersionSince(JobList jobs, Version version, Instant threshold) { return ! jobs.not().failingApplicationChange() - .firstFailing().endedBefore(threshold) + .firstFailing().endedNoLaterThan(threshold) .lastCompleted().on(version) .isEmpty(); } private static boolean failingApplicationChangeSince(JobList jobs, Instant threshold) { return ! jobs.failingApplicationChange() - .firstFailing().endedBefore(threshold) + .firstFailing().endedNoLaterThan(threshold) .isEmpty(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java index f353910163f..525eadb6eaf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -117,7 +117,7 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { return new RunFilter(JobStatus::firstFailing); } - /** Allows sub-filters for runs of the given kind */ + /** Allows sub-filters for runs of the indicated kind */ public class RunFilter { private final Function<JobStatus, Optional<Run>> which; @@ -126,47 +126,32 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { this.which = which; } - /** Returns the subset of jobs where the run of the given type exists */ + /** Returns the subset of jobs where the run of the indicated type exists */ public JobList present() { return matching(run -> true); } - /** Returns the runs of the given kind, mapped by the given function, as a list. */ + /** Returns the runs of the indicated kind, mapped by the given function, as a list. */ public <OtherType> List<OtherType> mapToList(Function<? super Run, OtherType> mapper) { return present().mapToList(which.andThen(Optional::get).andThen(mapper)); } - /** Returns the runs of the given kind. */ + /** Returns the runs of the indicated kind. */ public List<Run> asList() { return mapToList(Function.identity()); } - /** Returns the subset of jobs where the run of the given type occurred before the given instant */ - public JobList endedBefore(Instant threshold) { - return matching(run -> run.end().orElse(Instant.MAX).isBefore(threshold)); + /** Returns the subset of jobs where the run of the indicated type ended no later than the given instant */ + public JobList endedNoLaterThan(Instant threshold) { + return matching(run -> ! run.end().orElse(Instant.MAX).isAfter(threshold)); } - /** Returns the subset of jobs where the run of the given type occurred after the given instant */ - public JobList endedAfter(Instant threshold) { - return matching(run -> run.end().orElse(Instant.MIN).isAfter(threshold)); - } - - /** Returns the subset of jobs where the run of the given type occurred before the given instant */ - public JobList startedBefore(Instant threshold) { - return matching(run -> run.start().isBefore(threshold)); - } - - /** Returns the subset of jobs where the run of the given type occurred after the given instant */ - public JobList startedAfter(Instant threshold) { - return matching(run -> run.start().isAfter(threshold)); - } - - /** Returns the subset of jobs where the run of the given type was on the given version */ + /** Returns the subset of jobs where the run of the indicated type was on the given version */ public JobList on(ApplicationVersion version) { return matching(run -> run.versions().targetApplication().equals(version)); } - /** Returns the subset of jobs where the run of the given type was on the given version */ + /** Returns the subset of jobs where the run of the indicated type was on the given version */ public JobList on(Version version) { return matching(run -> run.versions().targetPlatform().equals(version)); } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index a1ee71b9a58..21a8911af41 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -55,6 +55,12 @@ public class Flags { "Whether to enable Nessus.", "Takes effect on next host admin tick", HOSTNAME); + public static final UnboundBooleanFlag ENABLE_FLEET_SSHD_CONFIG = defineFeatureFlag( + "enable-fleet-sshd-config", false, + "Whether fleet should manage the /etc/ssh/sshd_config file.", + "Takes effect on next host admin tick.", + HOSTNAME); + public static final UnboundListFlag<String> DISABLED_HOST_ADMIN_TASKS = defineListFlag( "disabled-host-admin-tasks", List.of(), String.class, "List of host-admin task names (as they appear in the log, e.g. root>main>UpgradeTask) that should be skipped", diff --git a/logforwarder/src/apps/vespa-logforwarder-start/cf-handler.cpp b/logforwarder/src/apps/vespa-logforwarder-start/cf-handler.cpp index 22550a19383..bf98bbd75ef 100644 --- a/logforwarder/src/apps/vespa-logforwarder-start/cf-handler.cpp +++ b/logforwarder/src/apps/vespa-logforwarder-start/cf-handler.cpp @@ -88,7 +88,7 @@ CfHandler::doConfigure() if (fp != NULL) { fprintf(fp, "[default]\n"); fprintf(fp, "host = %s\n", getenv("VESPA_HOSTNAME")); - fprintf(fp, "_meta = vespa_tenant::%s vespa_application::%s vespa_instance::%s\n", getenv("VESPA_TENANT"), getenv("VESPA_APPLICATION"), getenv("VESPA_INSTANCE")); + fprintf(fp, "_meta = vespa_tenant::%s vespa_app::%s.%s\n", getenv("VESPA_TENANT"), getenv("VESPA_APPLICATION"), getenv("VESPA_INSTANCE")); fclose(fp); rename(tmpPath.c_str(), path.c_str()); } diff --git a/metrics-proxy/CMakeLists.txt b/metrics-proxy/CMakeLists.txt index 41fedb8e8c4..b43019e9ba6 100644 --- a/metrics-proxy/CMakeLists.txt +++ b/metrics-proxy/CMakeLists.txt @@ -8,3 +8,4 @@ install_config_definition(src/main/resources/configdefinitions/metrics-nodes.def install_config_definition(src/main/resources/configdefinitions/node-dimensions.def ai.vespa.metricsproxy.metric.dimensions.node-dimensions.def) install_config_definition(src/main/resources/configdefinitions/rpc-connector.def ai.vespa.metricsproxy.rpc.rpc-connector.def) install_config_definition(src/main/resources/configdefinitions/vespa-services.def ai.vespa.metricsproxy.service.vespa-services.def) +install_config_definition(src/main/resources/configdefinitions/telegraf.def ai.vespa.metricsproxy.telegraf.telegraf.def) diff --git a/metrics-proxy/src/main/resources/configdefinitions/telegraf.def b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def new file mode 100644 index 00000000000..f3b5db35d52 --- /dev/null +++ b/metrics-proxy/src/main/resources/configdefinitions/telegraf.def @@ -0,0 +1,20 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package=ai.vespa.metricsproxy.telegraf + +# Metrics pull/push interval +intervalSeconds int default=60 + + +# The consumer to get metrics for +vespa.consumer string default="default" + + +cloudWatch[].region string default="us-east-1" +cloudWatch[].namespace string + +# Only valid and required for hosted Vespa +cloudWatch[].accessKeyName string default="" +cloudWatch[].secretKeyName string default="" + +# Only valid and optional for self-hosted Vespa +cloudWatch[].profile string default="" diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index bb6d53d3304..7db81b7739a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -17,6 +17,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; @@ -44,13 +45,15 @@ public class DynamicProvisioningMaintainer extends Maintainer { private static final ApplicationId preprovisionAppId = ApplicationId.from("hosted-vespa", "tenant-host", "preprovision"); private final HostProvisioner hostProvisioner; + private final HostResourcesCalculator hostResourcesCalculator; private final BooleanFlag dynamicProvisioningEnabled; private final ListFlag<PreprovisionCapacity> preprovisionCapacityFlag; - DynamicProvisioningMaintainer(NodeRepository nodeRepository, Duration interval, - HostProvisioner hostProvisioner, FlagSource flagSource) { + DynamicProvisioningMaintainer(NodeRepository nodeRepository, Duration interval, HostProvisioner hostProvisioner, + HostResourcesCalculator hostResourcesCalculator, FlagSource flagSource) { super(nodeRepository, interval); this.hostProvisioner = hostProvisioner; + this.hostResourcesCalculator = hostResourcesCalculator; this.dynamicProvisioningEnabled = Flags.ENABLE_DYNAMIC_PROVISIONING.bindTo(flagSource); this.preprovisionCapacityFlag = Flags.PREPROVISION_CAPACITY.bindTo(flagSource); } @@ -112,7 +115,7 @@ public class DynamicProvisioningMaintainer extends Maintainer { NodeResources resources = it.next(); removableHosts.stream() .filter(host -> NodePrioritizer.ALLOCATABLE_HOST_STATES.contains(host.state())) - .filter(host -> host.flavor().resources().satisfies(resources)) + .filter(host -> hostResourcesCalculator.availableCapacityOf(host.flavor().name(), host.flavor().resources()).satisfies(resources)) .min(Comparator.comparingInt(n -> n.flavor().cost())) .ifPresent(host -> { removableHosts.remove(host); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index ae8f8b052db..063b5ad2c2a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -81,7 +81,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService().map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService)); dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner -> - new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource)); + new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, provisionServiceProvider.getHostResourcesCalculator(), flagSource)); capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java index a609103ac89..fb76dc54d1a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacity.java @@ -26,12 +26,6 @@ public class DockerHostCapacity { this.hostResourcesCalculator = Objects.requireNonNull(hostResourcesCalculator, "hostResourcesCalculator must be non-null"); } - /** Returns the allocation skew of this host */ - public double skew(Node host) { - NodeResources free = freeCapacityOf(host, false); - return Node.skew(host.flavor().resources(), free); - } - int compareWithoutInactive(Node hostA, Node hostB) { int result = compare(freeCapacityOf(hostB, true), freeCapacityOf(hostA, true)); if (result != 0) return result; @@ -72,7 +66,7 @@ public class DockerHostCapacity { NodeResources freeCapacityOf(Node host, boolean excludeInactive) { // Only hosts have free capacity if (!host.type().canRun(NodeType.tenant)) return new NodeResources(0, 0, 0, 0); - NodeResources hostResources = hostResourcesCalculator.availableCapacityOf(host.flavor().resources()); + NodeResources hostResources = hostResourcesCalculator.availableCapacityOf(host.flavor().name(), host.flavor().resources()); return allNodes.childrenOf(host).asList().stream() .filter(node -> !(excludeInactive && isInactiveOrRetired(node))) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java index 05915b82bae..b5c4478cd5a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java @@ -30,7 +30,7 @@ public class EmptyProvisionServiceProvider implements ProvisionServiceProvider { public static class NoopHostResourcesCalculator implements HostResourcesCalculator { @Override - public NodeResources availableCapacityOf(NodeResources hostResources) { + public NodeResources availableCapacityOf(String flavorName, NodeResources hostResources) { return hostResources; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java index c5808a53837..a5570dbf169 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java @@ -9,6 +9,6 @@ import com.yahoo.config.provision.NodeResources; public interface HostResourcesCalculator { /** Calculates the resources that are reserved for host level processes and returns the remainder. */ - NodeResources availableCapacityOf(NodeResources hostResources); + NodeResources availableCapacityOf(String flavorName, NodeResources hostResources); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 9dd8de6d306..5ee82727da4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -26,9 +26,11 @@ import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; +import org.junit.Before; import org.junit.Test; import java.time.Duration; @@ -49,6 +51,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -64,11 +67,12 @@ public class DynamicProvisioningMaintainerTest { private final HostProvisionerTester tester = new HostProvisionerTester(); private final HostProvisioner hostProvisioner = mock(HostProvisioner.class); + private final HostResourcesCalculator hostResourcesCalculator = mock(HostResourcesCalculator.class); private final InMemoryFlagSource flagSource = new InMemoryFlagSource() .withBooleanFlag(Flags.ENABLE_DYNAMIC_PROVISIONING.id(), true) .withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(), PreprovisionCapacity.class); private final DynamicProvisioningMaintainer maintainer = new DynamicProvisioningMaintainer( - tester.nodeRepository, Duration.ofDays(1), hostProvisioner, flagSource); + tester.nodeRepository, Duration.ofDays(1), hostProvisioner, hostResourcesCalculator, flagSource); @Test public void delegates_to_host_provisioner_and_writes_back_result() { @@ -127,7 +131,7 @@ public class DynamicProvisioningMaintainerTest { @Test public void provision_deficit_and_deprovision_excess() { - flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(new PreprovisionCapacity(1, 3, 2, 1), new PreprovisionCapacity(2, 3, 2, 2)), PreprovisionCapacity.class); + flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(new PreprovisionCapacity(2, 4, 8, 1), new PreprovisionCapacity(2, 3, 2, 2)), PreprovisionCapacity.class); addNodes(); maintainer.convergeToCapacity(tester.nodeRepository.list()); @@ -150,6 +154,15 @@ public class DynamicProvisioningMaintainerTest { verifyNoMoreInteractions(hostProvisioner); } + @Before + public void setup() { + doAnswer(invocation -> { + String flavorName = invocation.getArgument(0, String.class); + if ("default".equals(flavorName)) return new NodeResources(2, 4, 8, 1); + return invocation.getArguments()[1]; + }).when(hostResourcesCalculator).availableCapacityOf(any(), any()); + } + public void addNodes() { List.of(createNode("host1", Optional.empty(), NodeType.host, Node.State.active, Optional.of(tenantHostApp)), createNode("host1-1", Optional.of("host1"), NodeType.tenant, Node.State.reserved, Optional.of(tenantApp)), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index d1a330a3bd6..d0c678bdf45 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -152,7 +152,7 @@ public class RebalancerTest { private static class IdentityHostResourcesCalculator implements HostResourcesCalculator { @Override - public NodeResources availableCapacityOf(NodeResources hostResources) { + public NodeResources availableCapacityOf(String flavorName, NodeResources hostResources) { return hostResources; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java index 7d9ac230771..ba9a04573e1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerHostCapacityTest.java @@ -37,7 +37,7 @@ public class DockerHostCapacityTest { @Before public void setup() { - doAnswer(invocation -> invocation.getArguments()[0]).when(hostResourcesCalculator).availableCapacityOf(any()); + doAnswer(invocation -> invocation.getArguments()[1]).when(hostResourcesCalculator).availableCapacityOf(any(), any()); // Create flavors NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("host", "docker", "docker2"); @@ -95,9 +95,9 @@ public class DockerHostCapacityTest { capacity.freeCapacityOf(host3, false)); doAnswer(invocation -> { - NodeResources totalHostResources = (NodeResources) invocation.getArguments()[0]; + NodeResources totalHostResources = (NodeResources) invocation.getArguments()[1]; return totalHostResources.subtract(new NodeResources(1, 2, 3, 0.5, NodeResources.DiskSpeed.any)); - }).when(hostResourcesCalculator).availableCapacityOf(any()); + }).when(hostResourcesCalculator).availableCapacityOf(any(), any()); assertEquals(new NodeResources(4, 2, 5, 1.5, NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote), capacity.freeCapacityOf(host1, false)); |