diff options
Diffstat (limited to 'node-repository/src/main/java/com/yahoo')
9 files changed, 53 insertions, 100 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 986ab830283..1e4a11fdea2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -233,13 +233,11 @@ public class ClusterModel { double queryCpu = queryCpuPerGroup * groupCount() / groups; double writeCpu = (double)groupSize() / groupSize; return new Load(cpu.queryFraction() * queryCpu + (1 - cpu.queryFraction()) * writeCpu, - (1 - memory.fixedFraction()) * (double) groupSize() / groupSize + memory.fixedFraction() * 1, - (double)groupSize() / groupSize, - 1, - 1); + (1 - memory.fixedFraction()) * (double)groupSize() / groupSize + memory.fixedFraction() * 1, + (double)groupSize() / groupSize); } else { - return new Load((double) nodeCount() / nodes, 1, 1, 1, 1); + return new Load((double)nodeCount() / nodes, 1, 1); } } @@ -248,7 +246,7 @@ public class ClusterModel { * if one of the nodes go down. */ public Load idealLoad() { - var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad(), cpu.idealLoad(), memory.idealLoad()).divide(redundancyAdjustment()); + var ideal = new Load(cpu.idealLoad(), memory.idealLoad(), disk.idealLoad()).divide(redundancyAdjustment()); if ( !cluster.bcpGroupInfo().isEmpty() && cluster.bcpGroupInfo().queryRate() > 0) { // Since we have little local information, use information about query cost in other groups Load bcpGroupIdeal = adjustQueryDependentIdealLoadByBcpGroupInfo(ideal); @@ -394,7 +392,7 @@ public class ClusterModel { if (averageQueryRate().isEmpty() || averageQueryRate().getAsDouble() == 0.0) return OptionalDouble.empty(); // TODO: Query rate should generally be sampled at the time where we see the peak resource usage int fanOut = clusterSpec.type().isContainer() ? 1 : groupSize(); - return OptionalDouble.of(peakLoad().cpu() * cpu.queryFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() + return OptionalDouble.of(peakLoad().cpu() * cpu.queryFraction() * fanOut * nodes.not().retired().first().get().resources().vcpu() / averageQueryRate().getAsDouble() / groupCount()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java index 6978e269c3d..e1ef21ebd13 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java @@ -67,8 +67,7 @@ public class ClusterNodesTimeseries { * the average of the highest reading for that dimension on each node. */ public Load peakLoad() { - return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk), - peakLoad(Load.Dimension.gpu), peakLoad(Load.Dimension.gpuMemory)); + return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk)); } private double peakLoad(Load.Dimension dimension) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java index 22c13795d18..799ed621807 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -3,7 +3,9 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.NodeResources; +import java.util.Objects; import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleFunction; import java.util.function.DoubleUnaryOperator; import java.util.function.Predicate; @@ -12,36 +14,32 @@ import java.util.function.Predicate; * * @author bratseth */ -public record Load(double cpu, double memory, double disk, double gpu, double gpuMemory) { +public class Load { - public enum Dimension { cpu, memory, disk, gpu, gpuMemory } + public enum Dimension { cpu, memory, disk } - public Load(double cpu, double memory, double disk, double gpu, double gpuMemory) { + private final double cpu, memory, disk; + + public Load(double cpu, double memory, double disk) { this.cpu = requireNormalized(cpu, "cpu"); this.memory = requireNormalized(memory, "memory"); this.disk = requireNormalized(disk, "disk"); - this.gpu = requireNormalized(gpu, "gpu"); - this.gpuMemory = requireNormalized(gpuMemory, "gpuMemory"); } public double cpu() { return cpu; } public double memory() { return memory; } public double disk() { return disk; } - public double gpu() { return gpu; } - public double gpuMemory() { return gpuMemory; } - public Load withCpu(double cpu) { return new Load(cpu, memory, disk, gpu, gpuMemory); } - public Load withMemory(double memory) { return new Load(cpu, memory, disk, gpu, gpuMemory); } - public Load withDisk(double disk) { return new Load(cpu, memory, disk, gpu, gpuMemory); } - public Load withGpu(double gpu) { return new Load(cpu, memory, disk, gpu, gpuMemory); } - public Load withGpuMemory(double gpuMemory) { return new Load(cpu, memory, disk, gpu, gpuMemory); } + public Load withCpu(double cpu) { return new Load(cpu, memory, disk); } + public Load withMemory(double memory) { return new Load(cpu, memory, disk); } + public Load withDisk(double disk) { return new Load(cpu, memory, disk); } public Load add(Load other) { return join(other, (a, b) -> a + b); } public Load multiply(NodeResources resources) { - return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb(), gpu * resources.gpuResources().count(), gpu * resources.gpuResources().memoryGb()); + return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb()); } public Load multiply(double factor) { return map(v -> v * factor); @@ -57,25 +55,21 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp return map(v -> divide(v, divisor)); } public Load divide(NodeResources resources) { - return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb()), divide(gpu, resources.gpuResources().count()), divide(gpuMemory, resources.gpuResources().memoryGb())); + return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb())); } /** Returns the load where the given function is applied to each dimension of this. */ public Load map(DoubleUnaryOperator f) { return new Load(f.applyAsDouble(cpu), f.applyAsDouble(memory), - f.applyAsDouble(disk), - f.applyAsDouble(gpu), - f.applyAsDouble(gpuMemory)); + f.applyAsDouble(disk)); } /** Returns the load where the given function is applied to each dimension of this and the given load. */ public Load join(Load other, DoubleBinaryOperator f) { return new Load(f.applyAsDouble(this.cpu(), other.cpu()), f.applyAsDouble(this.memory(), other.memory()), - f.applyAsDouble(this.disk(), other.disk()), - f.applyAsDouble(this.gpu(), other.gpu()), - f.applyAsDouble(this.gpuMemory(), other.gpuMemory())); + f.applyAsDouble(this.disk(), other.disk())); } /** Returns true if any dimension matches the predicate. */ @@ -94,8 +88,6 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp case cpu -> cpu(); case memory -> memory(); case disk -> disk(); - case gpu -> gpu(); - case gpuMemory -> gpuMemory(); }; } @@ -103,7 +95,7 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp if (Double.isNaN(value)) throw new IllegalArgumentException(name + " must be a number but is NaN"); if (value < 0) - throw new IllegalArgumentException(name + " must be zero or larger, but is " + value); + throw new IllegalArgumentException(name + " must be zero or lager, but is " + value); return value; } @@ -113,19 +105,28 @@ public record Load(double cpu, double memory, double disk, double gpu, double gp } @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Load other)) return false; + if (other.cpu() != this.cpu()) return false; + if (other.memory() != this.memory()) return false; + if (other.disk() != this.disk()) return false; + return true; + } + + @Override + public int hashCode() { return Objects.hash(cpu, memory, disk); } + + @Override public String toString() { - return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk," + gpu + " gpu," + gpuMemory + " gpuMemory"; + return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk"; } - public static Load zero() { return new Load(0, 0, 0, 0, 0); } - public static Load one() { return new Load(1, 1, 1, 1, 1); } + public static Load zero() { return new Load(0, 0, 0); } + public static Load one() { return new Load(1, 1, 1); } public static Load byDividing(NodeResources a, NodeResources b) { - return new Load(divide(a.vcpu(), b.vcpu()), - divide(a.memoryGb(), b.memoryGb()), - divide(a.diskGb(), b.diskGb()), - divide(a.gpuResources().count(), b.gpuResources().count()), - divide(a.gpuResources().memoryGb(), b.gpuResources().memoryGb())); + return new Load(divide(a.vcpu(), b.vcpu()), divide(a.memoryGb(), b.memoryGb()), divide(a.diskGb(), b.diskGb())); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index f35879d0b24..a6882e49efa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -76,10 +76,8 @@ public class MetricsResponse { nodeMetrics.add(new Pair<>(hostname, new NodeMetricSnapshot(at, new Load(Metric.cpu.from(nodeValues), Metric.memory.from(nodeValues), - Metric.disk.from(nodeValues), - Metric.gpu.from(nodeValues), - Metric.gpuMemory.from(nodeValues)), - (long) Metric.generation.from(nodeValues), + Metric.disk.from(nodeValues)), + (long)Metric.generation.from(nodeValues), Metric.inService.from(nodeValues) > 0, clusterIsStable(node.get(), applicationNodes, nodeValues), Metric.queryRate.from(nodeValues)))); @@ -128,7 +126,6 @@ public class MetricsResponse { @Override public List<String> metricResponseNames() { - // TODO(mpolden): Track only CPU util once we support proper GPU scaling return List.of(HostedNodeAdminMetrics.CPU_UTIL.baseName(), HostedNodeAdminMetrics.GPU_UTIL.baseName()); } @@ -142,7 +139,6 @@ public class MetricsResponse { @Override public List<String> metricResponseNames() { - // TODO(mpolden): Track only CPU memory once we support proper GPU scaling return List.of(HostedNodeAdminMetrics.MEM_UTIL.baseName(), SearchNodeMetrics.CONTENT_PROTON_RESOURCE_USAGE_MEMORY.average(), HostedNodeAdminMetrics.GPU_MEM_USED.baseName(), @@ -151,7 +147,7 @@ public class MetricsResponse { @Override double computeFinal(ListMap<String, Double> values) { - return Math.max(cpuMemUtil(values), gpuMemory.computeFinal(values)); + return Math.max(gpuMemUtil(values), cpuMemUtil(values)); } private double cpuMemUtil(ListMap<String, Double> values) { @@ -164,6 +160,12 @@ public class MetricsResponse { return 0; } + private double gpuMemUtil(ListMap<String, Double> values) { + var usedGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_USED.baseName()).stream().mapToDouble(v -> v).sum(); + var totalGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()).stream().mapToDouble(v -> v).sum(); + return totalGpuMemory > 0 ? usedGpuMemory / totalGpuMemory : 0; + } + }, disk { // a node resource @@ -185,35 +187,6 @@ public class MetricsResponse { } }, - gpu { // a node resource - - @Override - public List<String> metricResponseNames() { - return List.of(HostedNodeAdminMetrics.GPU_UTIL.baseName()); - } - - @Override - double computeFinal(ListMap<String, Double> values) { - return values.values().stream().flatMap(List::stream).mapToDouble(v -> v).max().orElse(0) / 100; // % to ratio - } - - }, - gpuMemory { // a node resource - - @Override - public List<String> metricResponseNames() { - return List.of(HostedNodeAdminMetrics.GPU_MEM_USED.baseName(), - HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()); - } - - @Override - double computeFinal(ListMap<String, Double> values) { - var usedGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_USED.baseName()).stream().mapToDouble(v -> v).sum(); - var totalGpuMemory = values.get(HostedNodeAdminMetrics.GPU_MEM_TOTAL.baseName()).stream().mapToDouble(v -> v).sum(); - return totalGpuMemory > 0 ? usedGpuMemory / totalGpuMemory : 0; - } - - }, generation { // application config generation active on the node @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java index c0de9a43f7f..38127fa3093 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java @@ -144,8 +144,6 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { row.putBool(6, snapshot.getSecond().inService()); row.putBool(7, snapshot.getSecond().stable()); row.putFloat(8, (float) snapshot.getSecond().queryRate()); - row.putFloat(9, (float) snapshot.getSecond().load().gpu()); - row.putFloat(10, (float) snapshot.getSecond().load().gpuMemory()); row.append(); } writer.commit(); @@ -245,9 +243,6 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { private void ensureNodeTableIsUpdated() { try { // Example: nodeTable.ensureColumnExists("write_rate", "float"); - // TODO(mpolden): Remove after January 2024 - nodeTable.ensureColumnExists("gpu_util", "float"); - nodeTable.ensureColumnExists("gpu_mem_total_util", "float"); } catch (Exception e) { nodeTable.repair(e); } @@ -267,9 +262,7 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { try { issue("create table " + nodeTable.name + " (hostname string, at timestamp, cpu_util float, mem_total_util float, disk_util float," + - " application_generation long, inService boolean, stable boolean, queries_rate float," + - " gpu_util float, gpu_mem_total_util float" + - " )" + + " application_generation long, inService boolean, stable boolean, queries_rate float)" + " timestamp(at)" + "PARTITION BY DAY;", newContext()); @@ -318,9 +311,7 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { new NodeMetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(1) / 1000), new Load(record.getFloat(2), record.getFloat(3), - record.getFloat(4), - record.getFloat(9), - record.getFloat(10)), + record.getFloat(4)), record.getLong(5), record.getBool(6), record.getBool(7), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 6f325700401..c4e7d3b9acc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -76,8 +76,6 @@ public class ApplicationSerializer { private static final String cpuKey = "cpu"; private static final String memoryKey = "memory"; private static final String diskKey = "disk"; - private static final String gpuKey = "gpu"; - private static final String gpuMemory = "gpuMemory"; private static final String fromKey = "from"; private static final String toKey = "to"; private static final String generationKey = "generation"; @@ -203,16 +201,12 @@ public class ApplicationSerializer { loadObject.setDouble(cpuKey, load.cpu()); loadObject.setDouble(memoryKey, load.memory()); loadObject.setDouble(diskKey, load.disk()); - loadObject.setDouble(gpuKey, load.gpu()); - loadObject.setDouble(gpuMemory, load.gpuMemory()); } private static Load loadFromSlime(Inspector loadObject) { return new Load(loadObject.field(cpuKey).asDouble(), loadObject.field(memoryKey).asDouble(), - loadObject.field(diskKey).asDouble(), - loadObject.field(gpuKey).asDouble(), - loadObject.field(gpuMemory).asDouble()); + loadObject.field(diskKey).asDouble()); } private static void toSlime(Autoscaling.Metrics metrics, Cursor metricsObject) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 89853896104..225eb3e4e8d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -98,8 +98,6 @@ public class ApplicationSerializer { loadObject.setDouble("cpu", load.cpu()); loadObject.setDouble("memory", load.memory()); loadObject.setDouble("disk", load.disk()); - loadObject.setDouble("gpu", load.gpu()); - loadObject.setDouble("gpuMemory", load.gpuMemory()); } private static void toSlime(Autoscaling.Metrics metrics, Cursor metricsObject) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index 0b157e8635b..9080030f026 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -502,8 +502,6 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { object.setDouble("cpu", load.cpu()); object.setDouble("memory", load.memory()); object.setDouble("disk", load.disk()); - object.setDouble("gpu", load.gpu()); - object.setDouble("gpuMemory", load.gpuMemory()); } /** Returns a copy of the given URI with the host and port from the given URI and the path set to the given path */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index d3b88997059..fe6b204ed31 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -40,6 +40,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.autoscale.Load; import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb; +import com.yahoo.vespa.hosted.provision.lb.LoadBalancerService; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Status; @@ -238,8 +239,8 @@ public class MockNodeRepository extends NodeRepository { Optional.of(new ClusterResources(4, 1, new NodeResources(3, 16, 100, 1))), clock().instant(), - new Load(0.1, 0.2, 0.3, 0, 0), - new Load(0.4, 0.5, 0.6, 0, 0), + new Load(0.1, 0.2, 0.3), + new Load(0.4, 0.5, 0.6), new Autoscaling.Metrics(0.7, 0.8, 0.9))); try (Mutex lock = applications().lock(app1Id)) { applications().put(app1.with(cluster1), lock); |