summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Meland <bjormel@users.noreply.github.com>2024-05-16 14:22:09 +0000
committerGitHub <noreply@github.com>2024-05-16 14:22:09 +0000
commit6f7f6c7b17f8f952ac9adcc13310e3182ba88cef (patch)
treee5a544a0730a50096d60466ae22b61a51163a93a
parent468e9fbe9ae106328493f073b1bc8b009591900b (diff)
parentcc2703290f229d6b225f717e2b38747b4e32f953 (diff)
Merge pull request #31229 from vespa-engine/bjormel/autoscaling-logging
Enable detailed Autoscaling logging with PermanentFlag
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java2
6 files changed, 39 insertions, 10 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
index 2a667930add..ec92188a029 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
@@ -404,6 +404,12 @@ public class PermanentFlags {
"Takes effect immediately",
INSTANCE_ID);
+ public static final UnboundBooleanFlag AUTOSCALING_DETAILED_LOGGING = defineFeatureFlag(
+ "autoscaling-detailed-logging", true,
+ "Whether to log autoscaling decision data",
+ "Takes effect immediately",
+ INSTANCE_ID);
+
public static final UnboundIntFlag MAX_HOSTS_PER_HOUR = defineIntFlag(
"max-hosts-per-hour", 40,
"The number of hosts that can be provisioned per hour in a zone, before throttling is " +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 61d4ced1367..5a790a1fe19 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -19,6 +19,7 @@ import static com.yahoo.vespa.hosted.provision.autoscale.Autoscaler.headroomRequ
* @author bratseth
*/
public class AllocationOptimizer {
+ private static final java.util.logging.Logger log = java.util.logging.Logger.getLogger(AllocationOptimizer.class.getName());
// The min and max nodes to consider when not using application supplied limits
private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2
@@ -37,8 +38,8 @@ public class AllocationOptimizer {
* @return the best allocation, if there are any possible legal allocations, fulfilling the target
* fully or partially, within the limits
*/
- public Optional<AllocatableResources> findBestAllocation(Load loadAdjustment, ClusterModel model, Limits limits) {
- return findBestAllocations(loadAdjustment, model, limits).stream().findFirst();
+ public Optional<AllocatableResources> findBestAllocation(Load loadAdjustment, ClusterModel model, Limits limits, boolean enableDetailedLogging) {
+ return findBestAllocations(loadAdjustment, model, limits, enableDetailedLogging).stream().findFirst();
}
/**
@@ -48,7 +49,7 @@ public class AllocationOptimizer {
* @return the best allocations, if there are any possible legal allocations, fulfilling the target
* fully or partially, within the limits. The list contains the three best allocations, sorted from most to least preferred.
*/
- public List<AllocatableResources> findBestAllocations(Load loadAdjustment, ClusterModel model, Limits limits) {
+ public List<AllocatableResources> findBestAllocations(Load loadAdjustment, ClusterModel model, Limits limits, boolean enableDetailedLogging) {
if (limits.isEmpty())
limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()),
new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()),
@@ -78,8 +79,15 @@ public class AllocationOptimizer {
nodeRepository);
if (allocatableResources.isEmpty()) continue;
bestAllocations.add(allocatableResources.get());
+ if (enableDetailedLogging) {
+ log.info("Adding allocatableResources to list for " + model.application().id() + " in " + model.current().clusterSpec().id() + ": "
+ + "\n\t" + allocatableResources.get().toString());
+ }
}
}
+ if (enableDetailedLogging) {
+ log.info("Found " + bestAllocations.size() + " legal allocations for " + model.application().id() + " in " + model.current().clusterSpec().id());
+ }
return bestAllocations.stream()
.sorted((one, other) -> {
if (one.preferableTo(other, model))
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 40819e709de..29ab6d65b9f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -17,7 +17,7 @@ import java.util.List;
* @author bratseth
*/
public class Autoscaler {
-
+ private static final java.util.logging.Logger log = java.util.logging.Logger.getLogger(Autoscaler.class.getName());
/** What cost difference is worth a reallocation? */
private static final double costDifferenceWorthReallocation = 0.1;
/** What resource difference is worth a reallocation? */
@@ -44,7 +44,7 @@ public class Autoscaler {
var model = model(application, cluster, clusterNodes);
if (model.isEmpty() || ! model.isStable(nodeRepository)) return List.of();
- var targets = allocationOptimizer.findBestAllocations(model.loadAdjustment(), model, Limits.empty());
+ var targets = allocationOptimizer.findBestAllocations(model.loadAdjustment(), model, Limits.empty(), false);
return targets.stream()
.map(target -> toAutoscaling(target, model))
.toList();
@@ -54,9 +54,10 @@ public class Autoscaler {
* Autoscale a cluster by load. This returns a better allocation (if found) inside the min and max limits.
*
* @param clusterNodes the list of all the active nodes in a cluster
+ * @param enableDetailedLogging Whether to log autoscaling decision data
* @return scaling advice for this cluster
*/
- public Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes) {
+ public Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, boolean enableDetailedLogging) {
var limits = Limits.of(cluster);
var model = model(application, cluster, clusterNodes);
if (model.isEmpty()) return Autoscaling.empty();
@@ -68,9 +69,12 @@ public class Autoscaler {
return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", model);
var loadAdjustment = model.loadAdjustment();
+ if (enableDetailedLogging) {
+ log.info("Application: " + application.id().toShortString() + ", loadAdjustment: " + loadAdjustment.toString());
+ }
// Ensure we only scale down if we'll have enough headroom to not scale up again given a small load increase
- var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits);
+ var target = allocationOptimizer.findBestAllocation(loadAdjustment, model, limits, enableDetailedLogging);
if (target.isEmpty())
return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", model);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 2bec9aa6115..5c9c5fe30d7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -39,6 +39,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
private final Deployer deployer;
private final Metric metric;
private final BooleanFlag enabledFlag;
+ private final BooleanFlag enableDetailedLoggingFlag;
public AutoscalingMaintainer(NodeRepository nodeRepository,
Deployer deployer,
@@ -49,6 +50,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
this.deployer = deployer;
this.metric = metric;
this.enabledFlag = PermanentFlags.AUTOSCALING.bindTo(nodeRepository.flagSource());
+ this.enableDetailedLoggingFlag = PermanentFlags.AUTOSCALING_DETAILED_LOGGING.bindTo(nodeRepository.flagSource());
}
@Override
@@ -80,6 +82,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
*/
private boolean autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId) {
boolean redeploy = false;
+ boolean enableDetailedLogging = enableDetailedLoggingFlag.with(Dimension.INSTANCE_ID, applicationId.serializedForm()).value();
try (var lock = nodeRepository().applications().lock(applicationId)) {
Optional<Application> application = nodeRepository().applications().get(applicationId);
if (application.isEmpty()) return true;
@@ -95,7 +98,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
// Autoscale unless an autoscaling is already in progress
Autoscaling autoscaling = null;
if (cluster.target().resources().isEmpty() && !cluster.scalingInProgress()) {
- autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes);
+ autoscaling = autoscaler.autoscale(application.get(), cluster, clusterNodes, enableDetailedLogging);
if (autoscaling.isPresent() || cluster.target().isEmpty()) // Ignore empty from recently started servers
cluster = cluster.withTarget(autoscaling);
}
@@ -108,6 +111,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) {
redeploy = true;
logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired());
+ if (enableDetailedLogging) {
+ log.info("autoscaling data for " + applicationId.toFullString() + ": "
+ + "\n\tmetrics().cpuCostPerQuery(): " + autoscaling.metrics().cpuCostPerQuery()
+ + "\n\tmetrics().queryRate(): " + autoscaling.metrics().queryRate()
+ + "\n\tmetrics().growthRateHeadroom(): " + autoscaling.metrics().growthRateHeadroom()
+ + "\n\tpeak(): " + autoscaling.peak().toString()
+ + "\n\tideal(): " + autoscaling.ideal().toString());
+ }
}
}
catch (ApplicationLockException e) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
index 7ac80dfbdb3..2588b02d712 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java
@@ -209,7 +209,7 @@ public class NodeRepositoryProvisioner implements Provisioner {
return model.current().advertisedResources();
// Otherwise, find an allocation that preserves the current resources as well as possible
- return allocationOptimizer.findBestAllocation(Load.one(), model, limits)
+ return allocationOptimizer.findBestAllocation(Load.one(), model, limits, false)
.orElseThrow(() -> newNoAllocationPossible(model.current().clusterSpec(), limits))
.advertisedResources();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
index 183ff85da47..401b6d83651 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTester.java
@@ -165,7 +165,7 @@ public class DynamicProvisioningTester {
nodeRepository().applications().put(application, lock);
}
return autoscaler.autoscale(application, application.clusters().get(cluster.id()),
- nodeRepository().nodes().list(Node.State.active).owner(applicationId));
+ nodeRepository().nodes().list(Node.State.active).owner(applicationId), false);
}
public List<Autoscaling> suggest(ApplicationId applicationId, ClusterSpec.Id clusterId,