diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-03-22 14:16:38 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-03-22 14:16:38 +0100 |
commit | 2fa625568fb3e619c0b58f3a23eb0b61748b2811 (patch) | |
tree | 74b5392370bbb9d94a0c55b95cd96ffed02917a4 /node-repository/src/main | |
parent | d3ed6feb94eb5fcab0a9b6a663203f921f9953d4 (diff) |
Refactor: Let the node repo own the metrics db
Diffstat (limited to 'node-repository/src/main')
11 files changed, 52 insertions, 60 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index f7ed4ebad3a..530afabcea9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.applications.Applications; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions; import com.yahoo.vespa.hosted.provision.node.Agent; @@ -30,12 +31,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import java.time.Clock; -import java.time.Duration; -import java.time.Instant; import java.util.List; import java.util.Optional; -import java.util.logging.Level; -import java.util.logging.Logger; import java.util.stream.Collectors; /** @@ -45,8 +42,6 @@ import java.util.stream.Collectors; */ public class NodeRepository extends AbstractComponent { - private static final Logger log = Logger.getLogger(NodeRepository.class.getName()); - private final CuratorDatabaseClient db; private final Clock clock; private final Zone zone; @@ -63,6 +58,7 @@ public class NodeRepository extends AbstractComponent { private final Applications applications; private final LoadBalancers loadBalancers; private final FlagSource flagSource; + private final MetricsDb metricsDb; private final int spareCount; /** @@ -75,7 +71,8 @@ public class NodeRepository extends AbstractComponent { ProvisionServiceProvider provisionServiceProvider, Curator curator, Zone zone, - FlagSource flagSource) { + FlagSource flagSource, + MetricsDb metricsDb) { this(flavors, provisionServiceProvider, curator, @@ -85,6 +82,7 @@ public class NodeRepository extends AbstractComponent { DockerImage.fromString(config.containerImage()) .withReplacedBy(DockerImage.fromString(config.containerImageReplacement())), flagSource, + metricsDb, config.useCuratorClientCache(), zone.environment().isProduction() && !zone.getCloud().dynamicProvisioning() ? 1 : 0, config.nodeCacheSize()); @@ -102,6 +100,7 @@ public class NodeRepository extends AbstractComponent { NameResolver nameResolver, DockerImage containerImage, FlagSource flagSource, + MetricsDb metricsDb, boolean useCuratorClientCache, int spareCount, long nodeCacheSize) { @@ -126,22 +125,9 @@ public class NodeRepository extends AbstractComponent { this.applications = new Applications(db); this.loadBalancers = new LoadBalancers(db); this.flagSource = flagSource; + this.metricsDb = metricsDb; this.spareCount = spareCount; - rewriteNodes(); - } - - /** Read and write all nodes to make sure they are stored in the latest version of the serialized format */ - private void rewriteNodes() { - Instant start = clock.instant(); - int nodesWritten = 0; - for (State state : State.values()) { - List<Node> nodes = db.readNodes(state); - // TODO(mpolden): This should take the lock before writing - db.writeTo(state, nodes, Agent.system, Optional.empty()); - nodesWritten += nodes.size(); - } - Instant end = clock.instant(); - log.log(Level.INFO, String.format("Rewrote %d nodes in %s", nodesWritten, Duration.between(start, end))); + nodes.rewrite(); } /** Returns the curator database client used by this */ @@ -183,6 +169,8 @@ public class NodeRepository extends AbstractComponent { public FlagSource flagSource() { return flagSource; } + public MetricsDb metricsDb() { return metricsDb; } + /** Returns the time keeper of this system */ public Clock clock() { return clock; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 2a51a921a9f..1d0ba3da6c5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -26,12 +26,10 @@ public class Autoscaler { /** What resource difference is worth a reallocation? */ private static final double resourceDifferenceWorthReallocation = 0.1; - private final MetricsDb metricsDb; private final NodeRepository nodeRepository; private final AllocationOptimizer allocationOptimizer; - public Autoscaler(MetricsDb metricsDb, NodeRepository nodeRepository) { - this.metricsDb = metricsDb; + public Autoscaler(NodeRepository nodeRepository) { this.nodeRepository = nodeRepository; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); } @@ -63,7 +61,7 @@ public class Autoscaler { cluster, clusterNodes.clusterSpec(), clusterNodes, - metricsDb, + nodeRepository.metricsDb(), nodeRepository.clock()); if ( ! clusterIsStable(clusterNodes, nodeRepository)) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java index bf8d354665a..377374e8bc5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java @@ -27,7 +27,7 @@ import java.util.stream.Collectors; */ public class MemoryMetricsDb implements MetricsDb { - private final NodeRepository nodeRepository; + private final Clock clock; /** Metric time series by node (hostname). Each list of metric snapshots is sorted by increasing timestamp */ private final Map<String, NodeTimeseries> nodeTimeseries = new HashMap<>(); @@ -37,12 +37,12 @@ public class MemoryMetricsDb implements MetricsDb { /** Lock all access for now since we modify lists inside a map */ private final Object lock = new Object(); - public MemoryMetricsDb(NodeRepository nodeRepository) { - this.nodeRepository = nodeRepository; + public MemoryMetricsDb(Clock clock) { + this.clock = clock; } @Override - public Clock clock() { return nodeRepository.clock(); } + public Clock clock() { return clock; } @Override public void addNodeMetrics(Collection<Pair<String, NodeMetricSnapshot>> nodeMetrics) { @@ -70,7 +70,7 @@ public class MemoryMetricsDb implements MetricsDb { @Override public List<NodeTimeseries> getNodeTimeseries(Duration period, Set<String> hostnames) { - Instant startTime = nodeRepository.clock().instant().minus(period); + Instant startTime = clock().instant().minus(period); synchronized (lock) { return hostnames.stream() .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) @@ -91,7 +91,7 @@ public class MemoryMetricsDb implements MetricsDb { // 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb for (String hostname : nodeTimeseries.keySet()) { var timeseries = nodeTimeseries.get(hostname); - timeseries = timeseries.justAfter(nodeRepository.clock().instant().minus(Autoscaler.maxScalingWindow())); + timeseries = timeseries.justAfter(clock().instant().minus(Autoscaler.maxScalingWindow())); if (timeseries.isEmpty()) nodeTimeseries.remove(hostname); else @@ -106,9 +106,6 @@ public class MemoryMetricsDb implements MetricsDb { private void add(String hostname, NodeMetricSnapshot snapshot) { NodeTimeseries timeseries = nodeTimeseries.get(hostname); if (timeseries == null) { // new node - Optional<Node> node = nodeRepository.nodes().node(hostname); - if (node.isEmpty()) return; - if (node.get().allocation().isEmpty()) return; timeseries = new NodeTimeseries(hostname, new ArrayList<>()); nodeTimeseries.put(hostname, timeseries); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java index 568c5f88661..672aad25b66 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -50,8 +50,8 @@ public interface MetricsDb { void close(); - static MemoryMetricsDb createTestInstance(NodeRepository nodeRepository) { - return new MemoryMetricsDb(nodeRepository); + static MemoryMetricsDb createTestInstance(Clock clock) { + return new MemoryMetricsDb(clock); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 40d495a47ee..17d33ef501c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -33,18 +33,15 @@ import java.util.Set; public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; - private final MetricsDb metricsDb; private final Deployer deployer; private final Metric metric; public AutoscalingMaintainer(NodeRepository nodeRepository, - MetricsDb metricsDb, Deployer deployer, Metric metric, Duration interval) { super(nodeRepository, interval, metric); - this.autoscaler = new Autoscaler(metricsDb, nodeRepository); - this.metricsDb = metricsDb; + this.autoscaler = new Autoscaler(nodeRepository); this.deployer = deployer; this.metric = metric; } @@ -115,8 +112,8 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at()))) return cluster; // - 2. all nodes have switched to the right config generation - for (NodeTimeseries nodeTimeseries : metricsDb.getNodeTimeseries(Duration.between(event.at(), clock().instant()), - clusterNodes)) { + for (var nodeTimeseries : nodeRepository().metricsDb().getNodeTimeseries(Duration.between(event.at(), clock().instant()), + clusterNodes)) { Optional<NodeMetricSnapshot> firstOnNewGeneration = nodeTimeseries.asList().stream() .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index f4509c0713e..f95094f891c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -24,16 +24,13 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { private static final int maxWarningsPerInvocation = 2; private final MetricsFetcher metricsFetcher; - private final MetricsDb metricsDb; public NodeMetricsDbMaintainer(NodeRepository nodeRepository, MetricsFetcher metricsFetcher, - MetricsDb metricsDb, Duration interval, Metric metric) { super(nodeRepository, interval, metric); this.metricsFetcher = metricsFetcher; - this.metricsDb = metricsDb; } @Override @@ -54,7 +51,7 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { if (++done < applications.size()) Thread.sleep(pauseMs); } - metricsDb.gc(); + nodeRepository().metricsDb().gc(); // Suppress failures for manual zones for now to avoid noise return nodeRepository().zone().environment().isManuallyDeployed() || warnings.get() == 0; @@ -75,8 +72,8 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { warnings.add(1); } else if (response != null) { - metricsDb.addNodeMetrics(response.nodeMetrics()); - metricsDb.addClusterMetrics(application, response.clusterMetrics()); + nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics()); + nodeRepository().metricsDb().addClusterMetrics(application, response.clusterMetrics()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 7f41f89f664..98f28da8c82 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -39,7 +39,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, - MetricsFetcher metricsFetcher, MetricsDb metricsDb) { + MetricsFetcher metricsFetcher) { DefaultTimes defaults = new DefaultTimes(zone, deployer); PeriodicApplicationMaintainer periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval, @@ -64,9 +64,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent { maintainers.add(new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval)); maintainers.add(new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric)); maintainers.add(new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval)); - maintainers.add(new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric)); - maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval)); - maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric)); + maintainers.add(new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, defaults.nodeMetricsCollectionInterval, metric)); + maintainers.add(new AutoscalingMaintainer(nodeRepository, deployer, metric, defaults.autoscalingInterval)); + maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, defaults.scalingSuggestionsInterval, metric)); maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer)); provisionServiceProvider.getLoadBalancerService(nodeRepository) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index e2b89879141..c217580872b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -29,11 +29,10 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; public ScalingSuggestionsMaintainer(NodeRepository nodeRepository, - MetricsDb metricsDb, Duration interval, Metric metric) { super(nodeRepository, interval, metric); - this.autoscaler = new Autoscaler(metricsDb, nodeRepository); + this.autoscaler = new Autoscaler(nodeRepository); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index cb9fb8182d4..f12699e0d81 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -15,6 +15,7 @@ import com.yahoo.vespa.hosted.provision.NoSuchNodeException; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeMutex; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; @@ -33,6 +34,8 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.BiFunction; +import java.util.logging.Level; +import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -51,6 +54,8 @@ import java.util.stream.Stream; // Nodes might have an application assigned in dirty. public class Nodes { + private static final Logger log = Logger.getLogger(Nodes.class.getName()); + private final Zone zone; private final Clock clock; private final CuratorDatabaseClient db; @@ -61,6 +66,20 @@ public class Nodes { this.db = db; } + /** Read and write all nodes to make sure they are stored in the latest version of the serialized format */ + public void rewrite() { + Instant start = clock.instant(); + int nodesWritten = 0; + for (Node.State state : Node.State.values()) { + List<Node> nodes = db.readNodes(state); + // TODO(mpolden): This should take the lock before writing + db.writeTo(state, nodes, Agent.system, Optional.empty()); + nodesWritten += nodes.size(); + } + Instant end = clock.instant(); + log.log(Level.INFO, String.format("Rewrote %d nodes in %s", nodesWritten, Duration.between(start, end))); + } + // ---------------- Query API ---------------------------------------------------------------- /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 75fa3aec1e2..a54acbe52ae 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -52,7 +52,6 @@ public class NodeRepositoryProvisioner implements Provisioner { private static final Logger log = Logger.getLogger(NodeRepositoryProvisioner.class.getName()); private final NodeRepository nodeRepository; - private final MetricsDb metricsDb; private final AllocationOptimizer allocationOptimizer; private final CapacityPolicies capacityPolicies; private final Zone zone; @@ -63,11 +62,9 @@ public class NodeRepositoryProvisioner implements Provisioner { @Inject public NodeRepositoryProvisioner(NodeRepository nodeRepository, - MetricsDb metricsDb, Zone zone, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { this.nodeRepository = nodeRepository; - this.metricsDb = metricsDb; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); this.capacityPolicies = new CapacityPolicies(nodeRepository); this.zone = zone; @@ -167,7 +164,7 @@ public class NodeRepositoryProvisioner implements Provisioner { firstDeployment // start at min, preserve current resources otherwise ? new AllocatableClusterResources(requested.minResources(), clusterSpec, nodeRepository) : new AllocatableClusterResources(nodes.asList(), nodeRepository, clusterSpec.isExclusive()); - var clusterModel = new ClusterModel(application, cluster, clusterSpec, nodes, metricsDb, nodeRepository.clock()); + var clusterModel = new ClusterModel(application, cluster, clusterSpec, nodes, nodeRepository.metricsDb(), nodeRepository.clock()); return within(Limits.of(requested), currentResources, firstDeployment, clusterModel); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index 32952eeb860..6d27acf77d1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -68,6 +68,7 @@ public class MockNodeRepository extends NodeRepository { new MockNameResolver().mockAnyLookup(), DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"), new InMemoryFlagSource(), + new MemoryMetricsDb(Clock.fixed(Instant.ofEpochMilli(123), ZoneId.of("Z"))), true, 0, 1000); this.flavors = flavors; @@ -78,7 +79,6 @@ public class MockNodeRepository extends NodeRepository { private void populate() { NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, - new MemoryMetricsDb(this), Zone.defaultZone(), new MockProvisionServiceProvider(), new InMemoryFlagSource()); |