summaryrefslogtreecommitdiffstats
path: root/searchcore/src
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2021-03-25 15:27:51 +0000
committerGeir Storli <geirst@verizonmedia.com>2021-03-25 15:27:51 +0000
commitc3aae425971b8c4ee558a621e4edc512a6565d67 (patch)
tree8871db7e2587958ee352e24cb77f78873a60d76a /searchcore/src
parent14694cc31cebd06e787a310d34c06aeb9b75b26c (diff)
Sample and expose the transient disk usage needed for running disk index fusion.
This is currently estimated as the size of the largest disk index among all document databases.
Diffstat (limited to 'searchcore/src')
-rw-r--r--searchcore/src/tests/proton/server/disk_mem_usage_sampler/CMakeLists.txt9
-rw-r--r--searchcore/src/tests/proton/server/disk_mem_usage_sampler/disk_mem_usage_sampler_test.cpp63
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/i_transient_resource_usage_provider.h6
-rw-r--r--searchcore/src/vespa/searchcore/proton/common/transient_resource_usage_provider.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.cpp17
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.h5
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp11
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/documentdb.cpp17
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.cpp1
12 files changed, 124 insertions, 12 deletions
diff --git a/searchcore/src/tests/proton/server/disk_mem_usage_sampler/CMakeLists.txt b/searchcore/src/tests/proton/server/disk_mem_usage_sampler/CMakeLists.txt
new file mode 100644
index 00000000000..ce1e3f30ebc
--- /dev/null
+++ b/searchcore/src/tests/proton/server/disk_mem_usage_sampler/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchcore_disk_mem_usage_sampler_test_app TEST
+ SOURCES
+ disk_mem_usage_sampler_test.cpp
+ DEPENDS
+ searchcore_server
+ GTest::GTest
+)
+vespa_add_test(NAME searchcore_disk_mem_usage_sampler_test_app COMMAND searchcore_disk_mem_usage_sampler_test_app)
diff --git a/searchcore/src/tests/proton/server/disk_mem_usage_sampler/disk_mem_usage_sampler_test.cpp b/searchcore/src/tests/proton/server/disk_mem_usage_sampler/disk_mem_usage_sampler_test.cpp
new file mode 100644
index 00000000000..efac3fdf804
--- /dev/null
+++ b/searchcore/src/tests/proton/server/disk_mem_usage_sampler/disk_mem_usage_sampler_test.cpp
@@ -0,0 +1,63 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcore/proton/common/hw_info.h>
+#include <vespa/searchcore/proton/common/i_transient_resource_usage_provider.h>
+#include <vespa/searchcore/proton/server/disk_mem_usage_sampler.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <chrono>
+#include <thread>
+
+using namespace proton;
+using namespace std::chrono_literals;
+
+constexpr uint64_t disk_size_bytes = 200000;
+constexpr uint64_t memory_size_bytes = 100000;
+
+HwInfo
+make_hw_info()
+{
+ return HwInfo(HwInfo::Disk(disk_size_bytes, false, true),
+ HwInfo::Memory(memory_size_bytes),
+ HwInfo::Cpu(1));
+}
+
+class MyProvider : public ITransientResourceUsageProvider {
+private:
+ size_t _memory_usage;
+ size_t _disk_usage;
+
+public:
+ MyProvider(size_t memory_usage, size_t disk_usage) noexcept
+ : _memory_usage(memory_usage),
+ _disk_usage(disk_usage)
+ {}
+ size_t get_transient_memory_usage() const override { return _memory_usage; }
+ size_t get_transient_disk_usage() const override { return _disk_usage; }
+};
+
+struct DiskMemUsageSamplerTest : public ::testing::Test {
+ DiskMemUsageSampler sampler;
+ DiskMemUsageSamplerTest():
+ sampler(".",
+ DiskMemUsageSampler::Config(0.8, 0.8,
+ 50ms, make_hw_info()))
+ {
+ sampler.add_transient_usage_provider(std::make_shared<MyProvider>(99, 200));
+ sampler.add_transient_usage_provider(std::make_shared<MyProvider>(100, 199));
+ }
+ const DiskMemUsageFilter& filter() const { return sampler.writeFilter(); }
+};
+
+TEST_F(DiskMemUsageSamplerTest, resource_usage_is_sampled)
+{
+ std::this_thread::sleep_for(100ms);
+ EXPECT_GT(filter().getMemoryStats().getAnonymousRss(), 0);
+ EXPECT_GT(filter().getDiskUsedSize(), 0);
+ EXPECT_EQ(100, filter().get_transient_memory_usage());
+ EXPECT_EQ(100.0 / memory_size_bytes, filter().get_relative_transient_memory_usage());
+ EXPECT_EQ(200, filter().get_transient_disk_usage());
+ EXPECT_EQ(200.0 / disk_size_bytes, filter().get_relative_transient_disk_usage());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchcore/src/vespa/searchcore/proton/common/i_transient_resource_usage_provider.h b/searchcore/src/vespa/searchcore/proton/common/i_transient_resource_usage_provider.h
index e51a5fa5827..035a2b19d6d 100644
--- a/searchcore/src/vespa/searchcore/proton/common/i_transient_resource_usage_provider.h
+++ b/searchcore/src/vespa/searchcore/proton/common/i_transient_resource_usage_provider.h
@@ -8,13 +8,15 @@ namespace proton {
/**
* Interface class providing transient resource usage.
- * E.g. extra memory needed for loading or saving an attribute vector.
- * It provides an aggregated view over several components (e.g. all attribute vectors for a document type).
+ *
+ * E.g. extra memory needed for loading or saving an attribute vectors or extra disk needed for running disk index fusion.
+ * It provides an aggregated max view over several components (e.g. all attribute vectors for a document type).
*/
class ITransientResourceUsageProvider {
public:
virtual ~ITransientResourceUsageProvider() = default;
virtual size_t get_transient_memory_usage() const = 0;
+ virtual size_t get_transient_disk_usage() const = 0;
};
}
diff --git a/searchcore/src/vespa/searchcore/proton/common/transient_resource_usage_provider.h b/searchcore/src/vespa/searchcore/proton/common/transient_resource_usage_provider.h
index 79a007f29af..556c874caed 100644
--- a/searchcore/src/vespa/searchcore/proton/common/transient_resource_usage_provider.h
+++ b/searchcore/src/vespa/searchcore/proton/common/transient_resource_usage_provider.h
@@ -19,6 +19,7 @@ public:
TransientResourceUsageProvider();
virtual ~TransientResourceUsageProvider();
size_t get_transient_memory_usage() const override;
+ size_t get_transient_disk_usage() const override { return 0; }
void set_transient_memory_usage(size_t transient_memory_usage);
};
diff --git a/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.cpp b/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.cpp
index c890592b411..85b2a7a4b99 100644
--- a/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.cpp
+++ b/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.cpp
@@ -10,7 +10,8 @@ ResourceUsageMetrics::ResourceUsageMetrics(metrics::MetricSet *parent)
diskUtilization("disk_utilization", {}, "The relative amount of disk used compared to the disk resource limit", this),
memory("memory", {}, "The relative amount of memory used by this process (value in the range [0, 1])", this),
memoryUtilization("memory_utilization", {}, "The relative amount of memory used compared to the memory resource limit", this),
- transient_memory("transient_memory", {}, "The relative amount of transient memory needed for load (value in the range [0, 1])", this),
+ transient_memory("transient_memory", {}, "The relative amount of transient memory needed for loading attributes. Max value among all attributes (value in the range [0, 1])", this),
+ transient_disk("transient_disk", {}, "The relative amount of transient disk needed for running disk index fusion. Max value among all disk indexes (value in the range [0, 1])", this),
memoryMappings("memory_mappings", {}, "The number of mapped memory areas", this),
openFileDescriptors("open_file_descriptors", {}, "The number of open files", this),
feedingBlocked("feeding_blocked", {}, "Whether feeding is blocked due to resource limits being reached (value is either 0 or 1)", this)
diff --git a/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.h b/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.h
index 1878cd4b7c9..038d9149b3b 100644
--- a/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.h
+++ b/searchcore/src/vespa/searchcore/proton/metrics/resource_usage_metrics.h
@@ -17,6 +17,7 @@ struct ResourceUsageMetrics : metrics::MetricSet
metrics::DoubleValueMetric memory;
metrics::DoubleValueMetric memoryUtilization;
metrics::DoubleValueMetric transient_memory;
+ metrics::DoubleValueMetric transient_disk;
metrics::LongValueMetric memoryMappings;
metrics::LongValueMetric openFileDescriptors;
metrics::LongValueMetric feedingBlocked;
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.cpp b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.cpp
index 4855577e712..ef77081132b 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.cpp
@@ -183,10 +183,11 @@ DiskMemUsageFilter::setDiskUsedSize(uint64_t diskUsedSizeBytes)
}
void
-DiskMemUsageFilter::set_transient_memory_usage(size_t transient_memory_usage)
+DiskMemUsageFilter::set_transient_resource_usage(size_t transient_memory_usage, size_t transient_disk_usage)
{
Guard guard(_lock);
_transient_memory_usage = transient_memory_usage;
+ _transient_disk_usage = transient_disk_usage;
}
void
@@ -225,6 +226,20 @@ DiskMemUsageFilter::get_relative_transient_memory_usage() const
return static_cast<double>(_transient_memory_usage) / _hwInfo.memory().sizeBytes();
}
+size_t
+DiskMemUsageFilter::get_transient_disk_usage() const
+{
+ Guard guard(_lock);
+ return _transient_disk_usage;
+}
+
+double
+DiskMemUsageFilter::get_relative_transient_disk_usage() const
+{
+ Guard guard(_lock);
+ return static_cast<double>(_transient_disk_usage) / _hwInfo.disk().sizeBytes();
+}
+
DiskMemUsageFilter::Config
DiskMemUsageFilter::getConfig() const
{
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.h b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.h
index badc363e5bd..cdb9fc5f4cb 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.h
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_filter.h
@@ -45,6 +45,7 @@ private:
vespalib::ProcessMemoryStats _memoryStats;
uint64_t _diskUsedSizeBytes;
size_t _transient_memory_usage;
+ size_t _transient_disk_usage;
Config _config;
State _state;
std::atomic<bool> _acceptWrite;
@@ -61,12 +62,14 @@ public:
~DiskMemUsageFilter() override;
void setMemoryStats(vespalib::ProcessMemoryStats memoryStats_in);
void setDiskUsedSize(uint64_t diskUsedSizeBytes);
- void set_transient_memory_usage(size_t transient_memory_usage);
+ void set_transient_resource_usage(size_t transient_memory_usage, size_t transient_disk_usage);
void setConfig(Config config);
vespalib::ProcessMemoryStats getMemoryStats() const;
uint64_t getDiskUsedSize() const;
size_t get_transient_memory_usage() const;
double get_relative_transient_memory_usage() const;
+ size_t get_transient_disk_usage() const;
+ double get_relative_transient_disk_usage() const;
Config getConfig() const;
const HwInfo &getHwInfo() const { return _hwInfo; }
DiskMemUsageState usageState() const;
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
index 2c79abcc602..abd89345401 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
@@ -52,7 +52,7 @@ DiskMemUsageSampler::sampleUsage()
{
sampleMemoryUsage();
sampleDiskUsage();
- sample_transient_memory_usage();
+ sample_transient_resource_usage();
}
namespace {
@@ -123,17 +123,18 @@ DiskMemUsageSampler::sampleMemoryUsage()
}
void
-DiskMemUsageSampler::sample_transient_memory_usage()
+DiskMemUsageSampler::sample_transient_resource_usage()
{
size_t max_transient_memory_usage = 0;
+ size_t max_transient_disk_usage = 0;
{
std::lock_guard<std::mutex> guard(_lock);
for (auto provider : _transient_usage_providers) {
- auto transient_memory_usage = provider->get_transient_memory_usage();
- max_transient_memory_usage = std::max(max_transient_memory_usage, transient_memory_usage);
+ max_transient_memory_usage = std::max(max_transient_memory_usage, provider->get_transient_memory_usage());
+ max_transient_disk_usage = std::max(max_transient_disk_usage, provider->get_transient_disk_usage());
}
}
- _filter.set_transient_memory_usage(max_transient_memory_usage);
+ _filter.set_transient_resource_usage(max_transient_memory_usage, max_transient_disk_usage);
}
void
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
index ef639c3b727..2248db87960 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
@@ -26,7 +26,7 @@ class DiskMemUsageSampler {
void sampleUsage();
void sampleDiskUsage();
void sampleMemoryUsage();
- void sample_transient_memory_usage();
+ void sample_transient_resource_usage();
public:
struct Config {
DiskMemUsageFilter::Config filterConfig;
diff --git a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp
index 4a6a4b7115d..3cb5c31e13c 100644
--- a/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/documentdb.cpp
@@ -88,6 +88,21 @@ public:
}
};
+class DocumentDBResourceUsageProvider : public TransientResourceUsageProvider {
+private:
+ const DocumentDB& _doc_db;
+
+public:
+ DocumentDBResourceUsageProvider(const DocumentDB& doc_db)
+ : _doc_db(doc_db)
+ {}
+ size_t get_transient_disk_usage() const override {
+ // We estimate the transient disk usage for the next disk index fusion
+ // as the size of the largest disk index.
+ return _doc_db.getReadySubDB()->getSearchableStats().max_component_size_on_disk();
+ }
+};
+
}
template <typename FunctionType>
@@ -152,7 +167,7 @@ DocumentDB::DocumentDB(const vespalib::string &baseDir,
_state(),
_dmUsageForwarder(_writeService.master()),
_writeFilter(),
- _transient_usage_provider(std::make_shared<TransientResourceUsageProvider>()),
+ _transient_usage_provider(std::make_shared<DocumentDBResourceUsageProvider>(*this)),
_feedHandler(std::make_unique<FeedHandler>(_writeService, tlsSpec, docTypeName, *this, _writeFilter, *this, tlsWriterFactory)),
_subDBs(*this, *this, *_feedHandler, _docTypeName, _writeService, warmupExecutor, fileHeaderContext,
metricsWireService, getMetrics(), queryLimiter, clock, _configMutex, _baseDir,
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
index bcd9b7d24d5..103805a41cd 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
@@ -750,6 +750,7 @@ Proton::updateMetrics(const metrics::MetricLockGuard &)
metrics.resourceUsage.memory.set(usageState.memoryState().usage());
metrics.resourceUsage.memoryUtilization.set(usageState.memoryState().utilization());
metrics.resourceUsage.transient_memory.set(usageFilter.get_relative_transient_memory_usage());
+ metrics.resourceUsage.transient_disk.set(usageFilter.get_relative_transient_disk_usage());
metrics.resourceUsage.memoryMappings.set(usageFilter.getMemoryStats().getMappingsCount());
metrics.resourceUsage.openFileDescriptors.set(FastOS_File::count_open_files());
metrics.resourceUsage.feedingBlocked.set((usageFilter.acceptWriteOperation() ? 0.0 : 1.0));