diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-01-20 19:32:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-20 19:32:33 +0100 |
commit | f6e8c6d4ad3cb35ef6135f5caf681d314d85f248 (patch) | |
tree | 6a9f551c401b8e4bb1f879e35396a1e84c8ff331 /storage | |
parent | 5beeb0c0d71739dbbe7c445e47ccc1ead43a9bd2 (diff) | |
parent | b7b4ea6be255e191d0859f4a1c10b5523ada595a (diff) |
Merge pull request #16112 from vespa-engine/toregge/add-service-layer-host-info-reporter
Add ServiceLayerHostInfoReporter.
Diffstat (limited to 'storage')
13 files changed, 275 insertions, 14 deletions
diff --git a/storage/src/tests/common/testnodestateupdater.cpp b/storage/src/tests/common/testnodestateupdater.cpp index c4afda1a5ad..27f21a31768 100644 --- a/storage/src/tests/common/testnodestateupdater.cpp +++ b/storage/src/tests/common/testnodestateupdater.cpp @@ -10,7 +10,8 @@ TestNodeStateUpdater::TestNodeStateUpdater(const lib::NodeType& type) _current(new lib::NodeState(type, lib::State::UP)), _clusterStateBundle(std::make_shared<const lib::ClusterStateBundle>(lib::ClusterState())), _listeners(), - _explicit_node_state_reply_send_invocations(0) + _explicit_node_state_reply_send_invocations(0), + _requested_almost_immediate_node_state_replies(0) { } TestNodeStateUpdater::~TestNodeStateUpdater() = default; diff --git a/storage/src/tests/common/testnodestateupdater.h b/storage/src/tests/common/testnodestateupdater.h index e0c636d2715..eb15b97a37f 100644 --- a/storage/src/tests/common/testnodestateupdater.h +++ b/storage/src/tests/common/testnodestateupdater.h @@ -19,6 +19,7 @@ struct TestNodeStateUpdater : public NodeStateUpdater std::shared_ptr<const lib::ClusterStateBundle> _clusterStateBundle; std::vector<StateListener*> _listeners; size_t _explicit_node_state_reply_send_invocations; + size_t _requested_almost_immediate_node_state_replies; public: explicit TestNodeStateUpdater(const lib::NodeType& type); @@ -37,6 +38,10 @@ public: ++_explicit_node_state_reply_send_invocations; } + void request_almost_immediate_node_state_replies() override { + ++_requested_almost_immediate_node_state_replies; + } + void setCurrentNodeState(const lib::NodeState& state) { _current = std::make_shared<lib::NodeState>(state); } @@ -47,6 +52,10 @@ public: size_t explicit_node_state_reply_send_invocations() const noexcept { return _explicit_node_state_reply_send_invocations; } + + size_t requested_almost_immediate_node_state_replies() const noexcept { + return _requested_almost_immediate_node_state_replies; + } }; } // storage diff --git a/storage/src/tests/persistence/filestorage/CMakeLists.txt b/storage/src/tests/persistence/filestorage/CMakeLists.txt index 7bd74b83786..33d20e97cb1 100644 --- a/storage/src/tests/persistence/filestorage/CMakeLists.txt +++ b/storage/src/tests/persistence/filestorage/CMakeLists.txt @@ -10,11 +10,13 @@ vespa_add_executable(storage_filestorage_gtest_runner_app TEST modifiedbucketcheckertest.cpp operationabortingtest.cpp sanitycheckeddeletetest.cpp + service_layer_host_info_reporter_test.cpp singlebucketjointest.cpp gtest_runner.cpp DEPENDS storage storageapi + storage_testhostreporter storage_testpersistence_common GTest::GTest ) diff --git a/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp b/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp new file mode 100644 index 00000000000..cf9d380fb13 --- /dev/null +++ b/storage/src/tests/persistence/filestorage/service_layer_host_info_reporter_test.cpp @@ -0,0 +1,91 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/storage/persistence/filestorage/service_layer_host_info_reporter.h> +#include <tests/common/hostreporter/util.h> +#include <tests/common/testnodestateupdater.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <iostream> + +namespace storage { + +using spi::ResourceUsage; + +namespace { + +double +get_usage_element(const vespalib::Slime& root, const vespalib::string& label) +{ + return root.get()["content-node"]["resource-usage"][label]["usage"].asDouble(); +} + +} + +struct ServiceLayerHostInfoReporterTest : ::testing::Test { + + TestNodeStateUpdater _state_manager; + ServiceLayerHostInfoReporter _reporter; + + ServiceLayerHostInfoReporterTest(); + ~ServiceLayerHostInfoReporterTest(); + + void notify(double disk_usage, double memory_usage) { + auto& listener = static_cast<spi::IResourceUsageListener&>(_reporter); + listener.update_resource_usage(ResourceUsage(disk_usage, memory_usage)); + } + + size_t requested_almost_immediate_replies() { return _state_manager.requested_almost_immediate_node_state_replies(); } + ResourceUsage get_old_usage() { return _reporter.get_old_resource_usage(); } + ResourceUsage get_usage() { return _reporter.get_usage(); } + ResourceUsage get_slime_usage() { + vespalib::Slime root; + util::reporterToSlime(_reporter, root); + return ResourceUsage(get_usage_element(root, "disk"), get_usage_element(root, "memory")); + } +}; + +ServiceLayerHostInfoReporterTest::ServiceLayerHostInfoReporterTest() + : _state_manager(lib::NodeType::STORAGE), + _reporter(_state_manager) +{ +} + +ServiceLayerHostInfoReporterTest::~ServiceLayerHostInfoReporterTest() = default; + +TEST_F(ServiceLayerHostInfoReporterTest, request_almost_immediate_node_state_as_needed) +{ + EXPECT_EQ(0, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.0, 0.0), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.0, 0.0), get_usage()); + notify(0.5, 0.4); + EXPECT_EQ(1, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.5, 0.4), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.5, 0.4), get_usage()); + notify(0.501, 0.401); + EXPECT_EQ(1, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.5, 0.4), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.501, 0.401), get_usage()); + notify(0.8, 0.4); + EXPECT_EQ(2, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.8, 0.4), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.8, 0.4), get_usage()); + notify(0.8, 0.7); + EXPECT_EQ(3, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.8, 0.7), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.8, 0.7), get_usage()); + notify(0.799, 0.699); + EXPECT_EQ(3, requested_almost_immediate_replies()); + EXPECT_EQ(ResourceUsage(0.8, 0.7), get_old_usage()); + EXPECT_EQ(ResourceUsage(0.799, 0.699), get_usage()); +} + +TEST_F(ServiceLayerHostInfoReporterTest, json_report_generated) +{ + EXPECT_EQ(ResourceUsage(0.0, 0.0), get_slime_usage()); + notify(0.5, 0.4); + EXPECT_EQ(ResourceUsage(0.5, 0.4), get_slime_usage()); +} + +} diff --git a/storage/src/tests/storageserver/statemanagertest.cpp b/storage/src/tests/storageserver/statemanagertest.cpp index b55e62d5fd3..1a9882bd0fa 100644 --- a/storage/src/tests/storageserver/statemanagertest.cpp +++ b/storage/src/tests/storageserver/statemanagertest.cpp @@ -316,6 +316,21 @@ TEST_F(StateManagerTest, immediate_node_state_replying_is_tracked_per_controller ASSERT_EQ(0, _upper->getNumReplies()); } +TEST_F(StateManagerTest, request_almost_immediate_replies_triggers_fast_reply) +{ + mark_reported_node_state_up(); + mark_reply_observed_from_n_controllers(1); + auto before = std::chrono::steady_clock::now(); + for (size_t pass = 0; pass < 100; ++pass) { + send_down_get_node_state_request(0); + _manager->request_almost_immediate_node_state_replies(); + _upper->waitForMessage(api::MessageType::GETNODESTATE_REPLY, 2); + clear_sent_replies(); + } + auto after = std::chrono::steady_clock::now(); + ASSERT_GT(10s, after - before); +} + TEST_F(StateManagerTest, activation_command_is_bounced_with_current_cluster_state_version) { force_current_cluster_state_version(12345); diff --git a/storage/src/vespa/storage/common/nodestateupdater.h b/storage/src/vespa/storage/common/nodestateupdater.h index 60f4213fe54..da7cb72e321 100644 --- a/storage/src/vespa/storage/common/nodestateupdater.h +++ b/storage/src/vespa/storage/common/nodestateupdater.h @@ -71,6 +71,11 @@ struct NodeStateUpdater { * regardless of whether the reported state has changed. */ virtual void immediately_send_get_node_state_replies() = 0; + + /** + * Request almost immediate node state replies. + */ + virtual void request_almost_immediate_node_state_replies() = 0; }; } // storage diff --git a/storage/src/vespa/storage/persistence/filestorage/CMakeLists.txt b/storage/src/vespa/storage/persistence/filestorage/CMakeLists.txt index 2fd54930f77..537470a5be0 100644 --- a/storage/src/vespa/storage/persistence/filestorage/CMakeLists.txt +++ b/storage/src/vespa/storage/persistence/filestorage/CMakeLists.txt @@ -8,5 +8,6 @@ vespa_add_library(storage_filestorpersistence OBJECT merge_handler_metrics.cpp mergestatus.cpp modifiedbucketchecker.cpp + service_layer_host_info_reporter.cpp DEPENDS ) diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index aadc58c9af6..67fa22ada03 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -7,6 +7,7 @@ #include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/common/doneinitializehandler.h> #include <vespa/vdslib/state/cluster_state_bundle.h> +#include <vespa/storage/common/hostreporter/hostinfo.h> #include <vespa/storage/common/messagebucket.h> #include <vespa/storage/config/config-stor-server.h> #include <vespa/storage/persistence/bucketownershipnotifier.h> @@ -76,15 +77,16 @@ FileStorManager(const config::ConfigUri & configUri, spi::PersistenceProvider& p _use_async_message_handling_on_schedule(false), _metrics(std::make_unique<FileStorMetrics>()), _closed(false), - _lock() + _lock(), + _host_info_reporter(_component.getStateUpdater()) { _configFetcher.subscribe(configUri.getConfigId(), this); _configFetcher.start(); _component.registerMetric(*_metrics); _component.registerStatusPage(*this); _component.getStateUpdater().addStateListener(*this); + hostInfoReporterRegistrar.registerReporter(&_host_info_reporter); propagateClusterStates(); - (void) hostInfoReporterRegistrar; } FileStorManager::~FileStorManager() diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h index b69657f2692..ae298d70a29 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h @@ -9,6 +9,7 @@ #pragma once #include "filestorhandler.h" +#include "service_layer_host_info_reporter.h" #include <vespa/vespalib/util/document_runnable.h> #include <vespa/vespalib/util/isequencedtaskexecutor.h> #include <vespa/document/bucket/bucketid.h> @@ -75,6 +76,7 @@ class FileStorManager : public StorageLinkQueued, bool _closed; std::mutex _lock; std::unique_ptr<vespalib::IDestructorCallback> _bucketExecutorRegistration; + ServiceLayerHostInfoReporter _host_info_reporter; public: FileStorManager(const config::ConfigUri &, spi::PersistenceProvider&, diff --git a/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp new file mode 100644 index 00000000000..784aa16bf02 --- /dev/null +++ b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.cpp @@ -0,0 +1,77 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "service_layer_host_info_reporter.h" +#include <vespa/storage/common/nodestateupdater.h> +#include <cmath> + +namespace storage { + +using Object = vespalib::JsonStream::Object; +using End = vespalib::JsonStream::End; + +namespace { + +constexpr double diff_slack = 0.01; + +void write_usage(vespalib::JsonStream& output, const vespalib::string &label, double value) +{ + output << label << Object(); + output << "usage" << value; + output << End(); +} + +bool want_immediate_report(const spi::ResourceUsage& old_resource_usage, const spi::ResourceUsage& resource_usage) +{ + auto disk_usage_diff = fabs(resource_usage.get_disk_usage() - old_resource_usage.get_disk_usage()); + auto memory_usage_diff = fabs(resource_usage.get_memory_usage() - old_resource_usage.get_memory_usage()); + return (disk_usage_diff > diff_slack || memory_usage_diff > diff_slack); +} + +} + +ServiceLayerHostInfoReporter::ServiceLayerHostInfoReporter(NodeStateUpdater& node_state_updater) + : HostReporter(), + spi::ResourceUsageListener(), + _node_state_updater(node_state_updater), + _lock(), + _old_resource_usage() +{ +} + +ServiceLayerHostInfoReporter::~ServiceLayerHostInfoReporter() +{ + spi::ResourceUsageListener::reset(); // detach +} + +void +ServiceLayerHostInfoReporter::update_resource_usage(const spi::ResourceUsage& resource_usage) +{ + bool immediate_report = want_immediate_report(_old_resource_usage, resource_usage); + if (immediate_report) { + _old_resource_usage = resource_usage; + } + { + std::lock_guard guard(_lock); + spi::ResourceUsageListener::update_resource_usage(resource_usage); + } + if (immediate_report) { + _node_state_updater.request_almost_immediate_node_state_replies(); + } +} + +void +ServiceLayerHostInfoReporter::report(vespalib::JsonStream& output) +{ + output << "content-node" << Object(); + output << "resource-usage" << Object(); + { + std::lock_guard guard(_lock); + auto& usage = get_usage(); + write_usage(output, "memory", usage.get_memory_usage()); + write_usage(output, "disk", usage.get_disk_usage()); + } + output << End(); + output << End(); +} + +} diff --git a/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.h b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.h new file mode 100644 index 00000000000..be0abc94987 --- /dev/null +++ b/storage/src/vespa/storage/persistence/filestorage/service_layer_host_info_reporter.h @@ -0,0 +1,34 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/persistence/spi/resource_usage_listener.h> +#include <vespa/storage/common/hostreporter/hostreporter.h> +#include <mutex> + +namespace storage { + +struct NodeStateUpdater; + +/* + * Host info reporter for service layer that provides resource usage. + */ +class ServiceLayerHostInfoReporter : public HostReporter, + public spi::ResourceUsageListener +{ + NodeStateUpdater& _node_state_updater; + std::mutex _lock; + spi::ResourceUsage _old_resource_usage; + + void update_resource_usage(const spi::ResourceUsage& resource_usage) override; +public: + ServiceLayerHostInfoReporter(NodeStateUpdater& node_state_updater); + + ServiceLayerHostInfoReporter(const ServiceLayerHostInfoReporter&) = delete; + ServiceLayerHostInfoReporter& operator=(const ServiceLayerHostInfoReporter&) = delete; + ~ServiceLayerHostInfoReporter(); + + void report(vespalib::JsonStream& output) override; + const spi::ResourceUsage &get_old_resource_usage() noexcept { return _old_resource_usage; } +}; + +} diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp index 395e33a0393..f2b23724c98 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.cpp +++ b/storage/src/vespa/storage/storageserver/statemanager.cpp @@ -49,7 +49,8 @@ StateManager::StateManager(StorageComponentRegister& compReg, _controllers_observed_explicit_node_state(), _noThreadTestMode(testMode), _grabbedExternalLock(false), - _notifyingListeners(false) + _notifyingListeners(false), + _requested_almost_immediate_node_state_replies(false) { _nodeState->setMinUsedBits(58); _nodeState->setStartTimestamp(_component.getClock().getTimeInSeconds().getTime()); @@ -520,21 +521,25 @@ StateManager::run(framework::ThreadHandle& thread) { while (true) { thread.registerTick(); - std::unique_lock guard(_threadLock); - // Take lock before doing stuff, to be sure we don't wait after - // destructor have grabbed lock to stop() us. if (thread.interrupted()) { break; } tick(); - _threadCond.wait_for(guard, 1000ms); + std::unique_lock guard(_threadLock); + if (!_requested_almost_immediate_node_state_replies.load(std::memory_order_relaxed)) { + _threadCond.wait_for(guard, 1000ms); + } } } void StateManager::tick() { - framework::MilliSecTime time(_component.getClock().getTimeInMillis()); + bool almost_immediate_replies = _requested_almost_immediate_node_state_replies.load(std::memory_order_relaxed); + if (almost_immediate_replies) { + _requested_almost_immediate_node_state_replies.store(false, std::memory_order_relaxed); + } + framework::MilliSecTime time(almost_immediate_replies ? framework::MilliSecTime(0) : _component.getClock().getTimeInMillis()); sendGetNodeStateReplies(time); } @@ -637,14 +642,27 @@ StateManager::getNodeInfo() const return json.str(); } +void +StateManager::clear_controllers_observed_explicit_node_state_vector() +{ + std::lock_guard guard(_stateLock); + // Next GetNodeState request from any controller will be replied to instantly + _controllers_observed_explicit_node_state.clear(); +} + void StateManager::immediately_send_get_node_state_replies() { LOG(debug, "Immediately replying to all pending GetNodeState requests"); - { - std::lock_guard guard(_stateLock); - // Next GetNodeState request from any controller will be replied to instantly - _controllers_observed_explicit_node_state.clear(); - } + clear_controllers_observed_explicit_node_state_vector(); sendGetNodeStateReplies(); } +void +StateManager::request_almost_immediate_node_state_replies() +{ + clear_controllers_observed_explicit_node_state_vector(); + std::unique_lock guard(_threadLock); + _requested_almost_immediate_node_state_replies.store(true, std::memory_order_relaxed); + _threadCond.notify_all(); +} + } // storage diff --git a/storage/src/vespa/storage/storageserver/statemanager.h b/storage/src/vespa/storage/storageserver/statemanager.h index 1731998c14f..9f2853456e5 100644 --- a/storage/src/vespa/storage/storageserver/statemanager.h +++ b/storage/src/vespa/storage/storageserver/statemanager.h @@ -69,6 +69,7 @@ class StateManager : public NodeStateUpdater, bool _noThreadTestMode; bool _grabbedExternalLock; std::atomic<bool> _notifyingListeners; + std::atomic<bool> _requested_almost_immediate_node_state_replies; public: explicit StateManager(StorageComponentRegister&, metrics::MetricManager&, @@ -96,6 +97,7 @@ public: HostInfo& getHostInfo() { return *_hostInfo; } void immediately_send_get_node_state_replies() override; + void request_almost_immediate_node_state_replies() override; private: struct ExternalStateLock; @@ -145,6 +147,8 @@ private: std::string getNodeInfo() const; void run(framework::ThreadHandle&) override; + + void clear_controllers_observed_explicit_node_state_vector(); }; } // storage |