diff options
7 files changed, 95 insertions, 60 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 5f8d8148b41..f9f7f3a00ae 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.admin.monitoring; +import com.yahoo.metrics.ClusterControllerMetrics; import com.yahoo.metrics.ConfigServerMetrics; import com.yahoo.metrics.ContainerMetrics; import com.yahoo.metrics.DistributorMetrics; @@ -254,31 +255,29 @@ public class VespaMetricSet { private static Set<Metric> getClusterControllerMetrics() { Set<Metric> metrics = new LinkedHashSet<>(); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_DOWN_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_INITIALIZING_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_MAINTENANCE_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RETIRED_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_STOPPING_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_UP_COUNT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_CLUSTER_STATE_CHANGE_COUNT.baseName()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_BUSY_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_IDLE_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); + addMetric(metrics, ClusterControllerMetrics.DOWN_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.INITIALIZING_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.MAINTENANCE_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.RETIRED_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.STOPPING_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.UP_COUNT.last()); + addMetric(metrics, ClusterControllerMetrics.CLUSTER_STATE_CHANGE_COUNT.baseName()); + addMetric(metrics, ClusterControllerMetrics.BUSY_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); + addMetric(metrics, ClusterControllerMetrics.IDLE_TICK_TIME_MS, EnumSet.of(last, max, sum, count)); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_WORK_MS, EnumSet.of(last, sum, count)); - - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_IS_MASTER.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_REMOTE_TASK_QUEUE_SIZE.last()); + addMetric(metrics, ClusterControllerMetrics.WORK_MS, EnumSet.of(last, sum, count)); + + addMetric(metrics, ClusterControllerMetrics.IS_MASTER.last()); + addMetric(metrics, ClusterControllerMetrics.REMOTE_TASK_QUEUE_SIZE.last()); // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. // DO NOT RELY ON THIS METRIC YET. - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_NODE_EVENT_COUNT.baseName()); - - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(last, max)); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max)); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max)); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT.last()); - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT.last()); - - addMetric(metrics, ContainerMetrics.CLUSTER_CONTROLLER_REINDEXING_PROGRESS.last()); + addMetric(metrics, ClusterControllerMetrics.NODE_EVENT_COUNT.baseName()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_NODES_ABOVE_LIMIT, EnumSet.of(last, max)); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION, EnumSet.of(last, max)); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION, EnumSet.of(last, max)); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.last()); + addMetric(metrics, ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.last()); + addMetric(metrics, ClusterControllerMetrics.REINDEXING_PROGRESS.last()); return metrics; } @@ -710,6 +709,10 @@ public class VespaMetricSet { metrics.add(new Metric(nameWithSuffix)); } + private static void addMetric(Set<Metric> metrics, ClusterControllerMetrics metric, EnumSet<Suffix> suffixes) { + suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); + } + private static void addMetric(Set<Metric> metrics, ContainerMetrics metric, EnumSet<Suffix> suffixes) { suffixes.forEach(suffix -> metrics.add(new Metric(metric.baseName() + "." + suffix.suffix()))); } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java index 61c0c17264c..7920bbed763 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/metrics/ClusterDeploymentMetricsRetriever.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.config.server.metrics; import ai.vespa.util.http.hc5.VespaHttpClientBuilder; import com.yahoo.concurrent.DaemonThreadFactory; +import com.yahoo.metrics.ClusterControllerMetrics; import com.yahoo.metrics.ContainerMetrics; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; @@ -137,11 +138,11 @@ public class ClusterDeploymentMetricsRetriever { case VESPA_DISTRIBUTOR -> optionalDouble(values.field("vds.distributor.docsstored.average")) .ifPresent(docCount -> aggregator.get().addDocumentCount(docCount)); case VESPA_CONTAINER_CLUSTERCONTROLLER -> - optionalDouble(values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max())).ifPresent(memoryUtil -> + optionalDouble(values.field(ClusterControllerMetrics.RESOURCE_USAGE_MAX_MEMORY_UTILIZATION.max())).ifPresent(memoryUtil -> aggregator.get() - .addMemoryUsage(memoryUtil, values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT.last()).asDouble()) - .addDiskUsage(values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()).asDouble(), - values.field(ContainerMetrics.CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT.last()).asDouble())); + .addMemoryUsage(memoryUtil, values.field(ClusterControllerMetrics.RESOURCE_USAGE_MEMORY_LIMIT.last()).asDouble()) + .addDiskUsage(values.field(ClusterControllerMetrics.RESOURCE_USAGE_MAX_DISK_UTILIZATION.max()).asDouble(), + values.field(ClusterControllerMetrics.RESOURCE_USAGE_DISK_LIMIT.last()).asDouble())); } } diff --git a/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java new file mode 100644 index 00000000000..fabfd5504f7 --- /dev/null +++ b/container-core/src/main/java/com/yahoo/metrics/ClusterControllerMetrics.java @@ -0,0 +1,53 @@ +package com.yahoo.metrics; + +/** + * @author yngve + */ +public enum ClusterControllerMetrics implements VespaMetrics { + + DOWN_COUNT("cluster-controller.down.count", Unit.NODE, "Number of content nodes down"), + INITIALIZING_COUNT("cluster-controller.initializing.count", Unit.NODE, "Number of content nodes initializing"), + MAINTENANCE_COUNT("cluster-controller.maintenance.count", Unit.NODE, "Number of content nodes in maintenance"), + RETIRED_COUNT("cluster-controller.retired.count", Unit.NODE, "Number of content nodes that are retired"), + STOPPING_COUNT("cluster-controller.stopping.count", Unit.NODE, "Number of content nodes currently stopping"), + UP_COUNT("cluster-controller.up.count", Unit.NODE, "Number of content nodes up"), + CLUSTER_STATE_CHANGE_COUNT("cluster-controller.cluster-state-change.count", Unit.NODE, "Number of nodes changing state"), + BUSY_TICK_TIME_MS("cluster-controller.busy-tick-time-ms", Unit.MILLISECOND, "Time busy"), + IDLE_TICK_TIME_MS("cluster-controller.idle-tick-time-ms", Unit.MILLISECOND, "Time idle"), + WORK_MS("cluster-controller.work-ms", Unit.MILLISECOND, "Time used for actual work"), + IS_MASTER("cluster-controller.is-master", Unit.BINARY, "1 if this cluster controller is currently the master, or 0 if not"), + REMOTE_TASK_QUEUE_SIZE("cluster-controller.remote-task-queue.size", Unit.OPERATION, "Number of remote tasks queued"), + // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. + // DO NOT RELY ON THIS METRIC YET. + NODE_EVENT_COUNT("cluster-controller.node-event.count", Unit.OPERATION, "Number of node events"), + RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"), + RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, per content node"), + RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"), + RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), + RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), + REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"); + + + private final String name; + private final Unit unit; + private final String description; + + ClusterControllerMetrics(String name, Unit unit, String description) { + this.name = name; + this.unit = unit; + this.description = description; + } + + public String baseName() { + return name; + } + + public Unit unit() { + return unit; + } + + public String description() { + return description; + } + +} diff --git a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java index d26e1b72dd7..c443c387381 100644 --- a/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java +++ b/container-core/src/main/java/com/yahoo/metrics/ContainerMetrics.java @@ -163,30 +163,6 @@ public enum ContainerMetrics implements VespaMetrics { SERVER_THREAD_POOL_SIZE("serverThreadPoolSize", Unit.THREAD, "Deprecated. Use jdisc.thread_pool.size instead."), SERVER_ACTIVE_THREADS("serverActiveThreads", Unit.THREAD, "Deprecated. Use jdisc.thread_pool.active_threads instead."), - - // Metrics from the cluster controller - CLUSTER_CONTROLLER_DOWN_COUNT("cluster-controller.down.count", Unit.NODE, "Number of content nodes down"), - CLUSTER_CONTROLLER_INITIALIZING_COUNT("cluster-controller.initializing.count", Unit.NODE, "Number of content nodes initializing"), - CLUSTER_CONTROLLER_MAINTENANCE_COUNT("cluster-controller.maintenance.count", Unit.NODE, "Number of content nodes in maintenance"), - CLUSTER_CONTROLLER_RETIRED_COUNT("cluster-controller.retired.count", Unit.NODE, "Number of content nodes that are retired"), - CLUSTER_CONTROLLER_STOPPING_COUNT("cluster-controller.stopping.count", Unit.NODE, "Number of content nodes currently stopping"), - CLUSTER_CONTROLLER_UP_COUNT("cluster-controller.up.count", Unit.NODE, "Number of content nodes up"), - CLUSTER_CONTROLLER_CLUSTER_STATE_CHANGE_COUNT("cluster-controller.cluster-state-change.count", Unit.NODE, "Number of nodes changing state"), - CLUSTER_CONTROLLER_BUSY_TICK_TIME_MS("cluster-controller.busy-tick-time-ms", Unit.MILLISECOND, "Time busy"), - CLUSTER_CONTROLLER_IDLE_TICK_TIME_MS("cluster-controller.idle-tick-time-ms", Unit.MILLISECOND, "Time idle"), - CLUSTER_CONTROLLER_WORK_MS("cluster-controller.work-ms", Unit.MILLISECOND, "Time used for actual work"), - CLUSTER_CONTROLLER_IS_MASTER("cluster-controller.is-master", Unit.BINARY, "1 if this cluster controller is currently the master, or 0 if not"), - CLUSTER_CONTROLLER_REMOTE_TASK_QUEUE_SIZE("cluster-controller.remote-task-queue.size", Unit.OPERATION, "Number of remote tasks queued"), - // TODO(hakonhall): Update this name once persistent "count" metrics has been implemented. - // DO NOT RELY ON THIS METRIC YET. - CLUSTER_CONTROLLER_NODE_EVENT_COUNT("cluster-controller.node-event.count", Unit.OPERATION, "Number of node events"), - CLUSTER_CONTROLLER_RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"), - CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, per content node"), - CLUSTER_CONTROLLER_RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, per content node"), - CLUSTER_CONTROLLER_RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"), - CLUSTER_CONTROLLER_RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"), - CLUSTER_CONTROLLER_REINDEXING_PROGRESS("reindexing.progress", Unit.FRACTION, "Re-indexing progress"), - // Java (JRT) TLS metrics JRT_TRANSPORT_TLS_CERTIFICATE_VERIFICATION_FAILURES("jrt.transport.tls-certificate-verification-failures", Unit.FAILURE, "TLS certificate verification failures"), JRT_TRANSPORT_PEER_AUTHORIZATION_FAILURES("jrt.transport.peer-authorization-failures", Unit.FAILURE, "TLS peer authorization failures"), diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp index f336742ca4b..6699ea82f1f 100644 --- a/searchlib/src/tests/attribute/attribute_test.cpp +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -49,6 +49,8 @@ string tmpDir("tmp"); string clsDir("clstmp"); string asuDir("asutmp"); +constexpr size_t sizeof_large_string_entry = sizeof(vespalib::datastore::UniqueStoreEntry<std::string>); + } namespace search { @@ -947,8 +949,8 @@ AttributeTest::testSingle() { AttributePtr ptr = createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE)); ptr->updateStat(true); - EXPECT_EQ(133216u, ptr->getStatus().getAllocated()); - EXPECT_EQ(53280u, ptr->getStatus().getUsed()); + EXPECT_EQ(133096u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); + EXPECT_EQ(53240u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<StringAttribute, string, string>(ptr, values); } @@ -957,8 +959,8 @@ AttributeTest::testSingle() cfg.setFastSearch(true); AttributePtr ptr = createAttribute("sv-fs-string", cfg); ptr->updateStat(true); - EXPECT_EQ(361584u, ptr->getStatus().getAllocated()); - EXPECT_EQ(105216u, ptr->getStatus().getUsed()); + EXPECT_EQ(361464u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); + EXPECT_EQ(105176u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testSingle<StringAttribute, string, string>(ptr, values); } @@ -1140,8 +1142,8 @@ AttributeTest::testArray() { AttributePtr ptr = createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY)); ptr->updateStat(true); - EXPECT_EQ(649232u, ptr->getStatus().getAllocated()); - EXPECT_EQ(565856u, ptr->getStatus().getUsed()); + EXPECT_EQ(649112u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); + EXPECT_EQ(565816u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<StringAttribute, string>(ptr, values); } @@ -1150,8 +1152,8 @@ AttributeTest::testArray() cfg.setFastSearch(true); AttributePtr ptr = createAttribute("afs-string", cfg); ptr->updateStat(true); - EXPECT_EQ(899536u, ptr->getStatus().getAllocated()); - EXPECT_EQ(617812u, ptr->getStatus().getUsed()); + EXPECT_EQ(899416u + sizeof_large_string_entry, ptr->getStatus().getAllocated()); + EXPECT_EQ(617772u + sizeof_large_string_entry, ptr->getStatus().getUsed()); addDocs(ptr, numDocs); testArray<StringAttribute, string>(ptr, values); } diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index 9043782e639..8464e0abfec 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -1022,7 +1022,7 @@ TEST_F("require that lid space can be increased after being compacted and then s TEST_F("require that there is control of static memory usage", Fixture) { vespalib::MemoryUsage usage = f.store.getMemoryUsage(); - EXPECT_EQUAL(584u + sizeof(std::mutex), sizeof(LogDataStore)); + EXPECT_EQUAL(536u + sizeof(LogDataStore::NameIdSet) + sizeof(std::mutex), sizeof(LogDataStore)); EXPECT_EQUAL(74108u, usage.allocatedBytes()); EXPECT_EQUAL(384u, usage.usedBytes()); } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp index 3e2adf21619..1d3ba27d6bf 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_string_allocator.cpp @@ -78,7 +78,7 @@ UniqueStoreSmallStringBufferType::get_memory_allocator() const } UniqueStoreExternalStringBufferType::UniqueStoreExternalStringBufferType(uint32_t array_size, uint32_t max_arrays, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator) - : BufferType<UniqueStoreEntry<std::string>>(array_size, 2u, max_arrays, NUM_ARRAYS_FOR_NEW_UNIQUESTORE_BUFFER, ALLOC_GROW_FACTOR), + : BufferType<UniqueStoreEntry<std::string>>(array_size, 0u, max_arrays, NUM_ARRAYS_FOR_NEW_UNIQUESTORE_BUFFER, ALLOC_GROW_FACTOR), _memory_allocator(std::move(memory_allocator)) { } |