summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-04-25 11:25:50 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-04-25 13:15:42 +0000
commitc0983a4d273b71a4b997516eb700f107134789a9 (patch)
tree9d71c99acd13dd3e580db8202e62b147cf6c1be2 /searchcore
parentad0f821291c71be9c077c98c0edece14ccb8fb4d (diff)
Sample transient usage first to ensure that the errors due to different
sampling time will be 'nice'. They will only not lead to alerts or feed blocked.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp37
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h16
2 files changed, 31 insertions, 22 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
index 7054c7077c8..2bc28cc63c4 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.cpp
@@ -33,25 +33,34 @@ DiskMemUsageSampler::setConfig(const Config &config)
_periodicTimer->reset();
_filter.setConfig(config.filterConfig);
_sampleInterval = config.sampleInterval;
- sampleUsage();
+ sampleAndReportUsage();
_lastSampleTime = vespalib::steady_clock::now();
vespalib::duration maxInterval = std::min(vespalib::duration(1s), _sampleInterval);
_periodicTimer->scheduleAtFixedRate(makeLambdaTask([this]() {
if (_filter.acceptWriteOperation() && (vespalib::steady_clock::now() < (_lastSampleTime + _sampleInterval))) {
return;
}
- sampleUsage();
+ sampleAndReportUsage();
_lastSampleTime = vespalib::steady_clock::now();
}),
maxInterval, maxInterval);
}
void
-DiskMemUsageSampler::sampleUsage()
+DiskMemUsageSampler::sampleAndReportUsage()
{
- sampleMemoryUsage();
- sampleDiskUsage();
- sample_transient_resource_usage();
+ TransientResourceUsage transientDiskUsage = sample_transient_resource_usage();
+ /* It is important that transient memory is reported first. This prevents
+ * a false positive where we report a too high disk or memory usage causing
+ * either feed blocked, or an alert due to metric spike.
+ * A false negative is less of a problem, as it will only be a short drop in the metric,
+ * and a short period of allowed feed. The latter will be very rare as you are rarely feed blocked anyway.
+ */
+ vespalib::ProcessMemoryStats memoryStats = sampleMemoryUsage();
+ uint64_t diskUsage = sampleDiskUsage();
+ _filter.set_transient_resource_usage(transientDiskUsage);
+ _filter.setDiskUsedSize(diskUsage);
+ _filter.setMemoryStats(memoryStats);
}
namespace {
@@ -106,22 +115,22 @@ sampleDiskUsageInDirectory(const fs::path &path)
}
-void
+uint64_t
DiskMemUsageSampler::sampleDiskUsage()
{
const auto &disk = _filter.getHwInfo().disk();
- _filter.setDiskUsedSize(disk.shared() ?
- sampleDiskUsageInDirectory(_path) :
- sampleDiskUsageOnFileSystem(_path, disk));
+ return disk.shared()
+ ? sampleDiskUsageInDirectory(_path)
+ : sampleDiskUsageOnFileSystem(_path, disk);
}
-void
+vespalib::ProcessMemoryStats
DiskMemUsageSampler::sampleMemoryUsage()
{
- _filter.setMemoryStats(vespalib::ProcessMemoryStats::create());
+ return vespalib::ProcessMemoryStats::create();
}
-void
+TransientResourceUsage
DiskMemUsageSampler::sample_transient_resource_usage()
{
TransientResourceUsage transient_usage;
@@ -131,7 +140,7 @@ DiskMemUsageSampler::sample_transient_resource_usage()
transient_usage.merge(provider->get_transient_resource_usage());
}
}
- _filter.set_transient_resource_usage(transient_usage);
+ return transient_usage;
}
void
diff --git a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
index 7475282e718..fa8ac48fa1f 100644
--- a/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
+++ b/searchcore/src/vespa/searchcore/proton/server/disk_mem_usage_sampler.h
@@ -17,18 +17,18 @@ class ITransientResourceUsageProvider;
* Class to sample disk and memory usage used for filtering write operations.
*/
class DiskMemUsageSampler {
- DiskMemUsageFilter _filter;
- std::filesystem::path _path;
- vespalib::duration _sampleInterval;
- vespalib::steady_time _lastSampleTime;
+ DiskMemUsageFilter _filter;
+ std::filesystem::path _path;
+ vespalib::duration _sampleInterval;
+ vespalib::steady_time _lastSampleTime;
std::unique_ptr<vespalib::ScheduledExecutor> _periodicTimer;
std::mutex _lock;
std::vector<std::shared_ptr<const ITransientResourceUsageProvider>> _transient_usage_providers;
- void sampleUsage();
- void sampleDiskUsage();
- void sampleMemoryUsage();
- void sample_transient_resource_usage();
+ void sampleAndReportUsage();
+ uint64_t sampleDiskUsage();
+ vespalib::ProcessMemoryStats sampleMemoryUsage();
+ TransientResourceUsage sample_transient_resource_usage();
public:
struct Config {
DiskMemUsageFilter::Config filterConfig;