diff options
20 files changed, 78 insertions, 378 deletions
diff --git a/persistence/src/tests/spi/clusterstatetest.cpp b/persistence/src/tests/spi/clusterstatetest.cpp index ac67903244f..8a303c1a1ac 100644 --- a/persistence/src/tests/spi/clusterstatetest.cpp +++ b/persistence/src/tests/spi/clusterstatetest.cpp @@ -233,31 +233,4 @@ TEST(ClusterStateTest, can_infer_own_node_retired_state) EXPECT_TRUE(!node_marked_as_retired_in_state("distributor:3 storage:3 .1.s:r", d, 0)); } -namespace { - -bool -node_marked_as_maintenance_in_state(const std::string& stateStr, - const lib::Distribution& d, - uint16_t node, - bool maintenance_in_all_spaces) -{ - lib::ClusterState s(stateStr); - ClusterState state(s, node, d, maintenance_in_all_spaces); - return state.nodeMaintenance(); -} - -} - -// We want to track the maintenance state for the _node_, not just the _bucket space_. -TEST(ClusterStateTest, node_maintenance_state_is_set_independent_of_bucket_space_state_string) -{ - lib::Distribution d(lib::Distribution::getDefaultDistributionConfig(3, 3)); - - // Note: it doesn't actually matter what the cluster state string itself says here - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, false)); - EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3", d, 0, true)); - EXPECT_TRUE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:d", d, 0, true)); - EXPECT_FALSE(node_marked_as_maintenance_in_state("distributor:3 storage:3 .0.s:m", d, 0, false)); -} - } diff --git a/persistence/src/vespa/persistence/spi/clusterstate.cpp b/persistence/src/vespa/persistence/spi/clusterstate.cpp index f82e6165fb8..4bc538996ca 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.cpp +++ b/persistence/src/vespa/persistence/spi/clusterstate.cpp @@ -14,12 +14,10 @@ namespace storage::spi { ClusterState::ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution, - bool maintenanceInAllSpaces) + const lib::Distribution& distribution) : _state(std::make_unique<lib::ClusterState>(state)), _distribution(std::make_unique<lib::Distribution>(distribution.serialize())), - _nodeIndex(nodeIndex), - _maintenanceInAllSpaces(maintenanceInAllSpaces) + _nodeIndex(nodeIndex) { } @@ -35,11 +33,14 @@ void ClusterState::deserialize(vespalib::nbostream& i) { _distribution = std::make_unique<lib::Distribution>(distribution); } +ClusterState::ClusterState(vespalib::nbostream& i) { + deserialize(i); +} + ClusterState::ClusterState(const ClusterState& other) { vespalib::nbostream o; other.serialize(o); deserialize(o); - _maintenanceInAllSpaces = other._maintenanceInAllSpaces; } ClusterState::~ClusterState() = default; @@ -67,32 +68,28 @@ ClusterState::shouldBeReady(const Bucket& b) const { return Trinary::False; } -bool ClusterState::clusterUp() const noexcept { +bool ClusterState::clusterUp() const { return _state && _state->getClusterState() == lib::State::UP; } -bool ClusterState::nodeHasStateOneOf(const char* states) const noexcept { +bool ClusterState::nodeHasStateOneOf(const char* states) const { return _state && _state->getNodeState(lib::Node(lib::NodeType::STORAGE, _nodeIndex)). getState().oneOf(states); } -bool ClusterState::nodeUp() const noexcept { +bool ClusterState::nodeUp() const { return nodeHasStateOneOf("uir"); } -bool ClusterState::nodeInitializing() const noexcept { +bool ClusterState::nodeInitializing() const { return nodeHasStateOneOf("i"); } -bool ClusterState::nodeRetired() const noexcept { +bool ClusterState::nodeRetired() const { return nodeHasStateOneOf("r"); } -bool ClusterState::nodeMaintenance() const noexcept { - return _maintenanceInAllSpaces; -} - void ClusterState::serialize(vespalib::nbostream& o) const { assert(_distribution); assert(_state); diff --git a/persistence/src/vespa/persistence/spi/clusterstate.h b/persistence/src/vespa/persistence/spi/clusterstate.h index bde7e5bdbf4..8e48758e243 100644 --- a/persistence/src/vespa/persistence/spi/clusterstate.h +++ b/persistence/src/vespa/persistence/spi/clusterstate.h @@ -23,9 +23,9 @@ public: ClusterState(const lib::ClusterState& state, uint16_t nodeIndex, - const lib::Distribution& distribution, - bool maintenanceInAllSpaces = false); + const lib::Distribution& distribution); + ClusterState(vespalib::nbostream& i); ClusterState(const ClusterState& other); ClusterState& operator=(const ClusterState& other) = delete; ~ClusterState(); @@ -45,32 +45,23 @@ public: * compared to the complete list of nodes, and deigns the system to be * unusable. */ - [[nodiscard]] bool clusterUp() const noexcept; + bool clusterUp() const; /** * Returns false if this node has been set in a state where it should not * receive external load. - * - * TODO rename to indicate bucket space affinity. */ - [[nodiscard]] bool nodeUp() const noexcept; + bool nodeUp() const; /** * Returns true iff this node is marked as Initializing in the cluster state. - * - * TODO remove, init no longer used internally. */ - [[nodiscard]] bool nodeInitializing() const noexcept; + bool nodeInitializing() const; /** * Returns true iff this node is marked as Retired in the cluster state. */ - [[nodiscard]] bool nodeRetired() const noexcept; - - /** - * Returns true iff this node is marked as Maintenance in all bucket space cluster states. - */ - [[nodiscard]] bool nodeMaintenance() const noexcept; + bool nodeRetired() const; /** * Returns a serialized form of this object. @@ -81,10 +72,9 @@ private: std::unique_ptr<lib::ClusterState> _state; std::unique_ptr<lib::Distribution> _distribution; uint16_t _nodeIndex; - bool _maintenanceInAllSpaces; void deserialize(vespalib::nbostream&); - bool nodeHasStateOneOf(const char* states) const noexcept; + bool nodeHasStateOneOf(const char* states) const; }; } diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp index cadfa8cd72f..29748a2010c 100644 --- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp @@ -140,11 +140,6 @@ struct Fixture setNodeUp(bool value) { _calc->setNodeUp(value); - _calc->setNodeMaintenance(false); - _handler.notifyClusterStateChanged(_calc); - } - void setNodeMaintenance(bool value) { - _calc->setNodeMaintenance(value); _handler.notifyClusterStateChanged(_calc); } }; @@ -228,7 +223,7 @@ TEST_F("require that unready bucket can be reported as active", Fixture) } -TEST_F("node going down (but not into maintenance state) deactivates all buckets", Fixture) +TEST_F("require that node being down deactivates buckets", Fixture) { f._handler.handleSetCurrentState(f._ready.bucket(2), BucketInfo::ACTIVE, f._genResult); @@ -257,42 +252,6 @@ TEST_F("node going down (but not into maintenance state) deactivates all buckets EXPECT_EQUAL(true, f._bucketInfo.getInfo().isActive()); } -TEST_F("node going into maintenance state does _not_ deactivate any buckets", Fixture) -{ - f._handler.handleSetCurrentState(f._ready.bucket(2), - BucketInfo::ACTIVE, f._genResult); - f.sync(); - f.setNodeMaintenance(true); - f.sync(); - f.handleGetBucketInfo(f._ready.bucket(2)); - EXPECT_TRUE(f._bucketInfo.getInfo().isActive()); -} - -TEST_F("node going from maintenance to up state deactivates all buckets", Fixture) -{ - f._handler.handleSetCurrentState(f._ready.bucket(2), - BucketInfo::ACTIVE, f._genResult); - f.sync(); - f.setNodeMaintenance(true); - f.sync(); - f.setNodeUp(true); - f.sync(); - f.handleGetBucketInfo(f._ready.bucket(2)); - EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); -} - -TEST_F("node going from maintenance to down state deactivates all buckets", Fixture) -{ - f._handler.handleSetCurrentState(f._ready.bucket(2), - BucketInfo::ACTIVE, f._genResult); - f.sync(); - f.setNodeMaintenance(true); - f.sync(); - f.setNodeUp(false); - f.sync(); - f.handleGetBucketInfo(f._ready.bucket(2)); - EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); -} TEST_MAIN() { diff --git a/searchcore/src/tests/proton/matchengine/matchengine.cpp b/searchcore/src/tests/proton/matchengine/matchengine.cpp index 34c36fd9a72..481a9f061be 100644 --- a/searchcore/src/tests/proton/matchengine/matchengine.cpp +++ b/searchcore/src/tests/proton/matchengine/matchengine.cpp @@ -17,7 +17,7 @@ class MySearchHandler : public ISearchHandler { std::string _name; std::string _reply; public: - explicit MySearchHandler(size_t numHits = 0) : + MySearchHandler(size_t numHits = 0) : _numHits(numHits), _name("my"), _reply("myreply") {} DocsumReply::UP getDocsums(const DocsumRequest &) override { @@ -91,7 +91,6 @@ assertSearchReply(MatchEngine & engine, const std::string & searchDocType, size_ LocalSearchClient client; engine.search(SearchRequest::Source(request), client); SearchReply::UP reply = client.getReply(10000); - ASSERT_TRUE(reply); return EXPECT_EQUAL(expHits, reply->hits.size()); } @@ -174,24 +173,11 @@ TEST("requireThatEmptySearchReplyIsReturnedWhenEngineIsClosed") LocalSearchClient client; SearchRequest::Source request(new SearchRequest()); SearchReply::UP reply = engine.search(std::move(request), client); - ASSERT_TRUE(reply); + EXPECT_TRUE(reply ); EXPECT_EQUAL(0u, reply->hits.size()); EXPECT_EQUAL(7u, reply->getDistributionKey()); } -namespace { - -constexpr const char* search_interface_offline_slime_str() noexcept { - return "{\n" - " \"status\": {\n" - " \"state\": \"OFFLINE\",\n" - " \"message\": \"Search interface is offline\"\n" - " }\n" - "}\n"; -} - -} - TEST("requireThatStateIsReported") { MatchEngine engine(1, 1, 7); @@ -199,44 +185,14 @@ TEST("requireThatStateIsReported") Slime slime; SlimeInserter inserter(slime); engine.get_state(inserter, false); - EXPECT_EQUAL(search_interface_offline_slime_str(), - slime.toString()); -} - -TEST("searches are executed when node is in maintenance mode") -{ - MatchEngine engine(1, 1, 7); - engine.setNodeMaintenance(true); - engine.putSearchHandler(DocTypeName("foo"), std::make_shared<MySearchHandler>(3)); - EXPECT_TRUE(assertSearchReply(engine, "foo", 3)); -} - -TEST("setNodeMaintenance(true) implies setNodeUp(false)") -{ - MatchEngine engine(1, 1, 7); - engine.setNodeUp(true); - engine.setNodeMaintenance(true); - EXPECT_FALSE(engine.isOnline()); -} - -TEST("setNodeMaintenance(false) does not imply setNodeUp(false)") -{ - MatchEngine engine(1, 1, 7); - engine.setNodeUp(true); - engine.setNodeMaintenance(false); - EXPECT_TRUE(engine.isOnline()); -} - -TEST("search interface is reported as offline when node is in maintenance mode") -{ - MatchEngine engine(1, 1, 7); - engine.setNodeMaintenance(true); - - Slime slime; - SlimeInserter inserter(slime); - engine.get_state(inserter, false); - EXPECT_EQUAL(search_interface_offline_slime_str(), - slime.toString()); + EXPECT_EQUAL( + "{\n" + " \"status\": {\n" + " \"state\": \"OFFLINE\",\n" + " \"message\": \"Search interface is offline\"\n" + " }\n" + "}\n", + slime.toString()); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp index 58dc473b85e..5ad4a7ed52b 100644 --- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp +++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp @@ -50,8 +50,7 @@ MatchEngine::MatchEngine(size_t numThreads, size_t threadsPerSearch, uint32_t di _handlers(), _executor(std::max(size_t(1), numThreads / threadsPerSearch), 256_Ki, match_engine_executor), _threadBundlePool(std::max(size_t(1), threadsPerSearch)), - _nodeUp(false), - _nodeMaintenance(false) + _nodeUp(false) { } @@ -99,8 +98,7 @@ search::engine::SearchReply::UP MatchEngine::search(search::engine::SearchRequest::Source request, search::engine::SearchClient &client) { - // We continue to allow searches if the node is in Maintenance mode - if (_closed || (!_nodeUp && !_nodeMaintenance)) { + if (_closed || !_nodeUp) { auto ret = std::make_unique<search::engine::SearchReply>(); ret->setDistributionKey(_distributionKey); @@ -179,14 +177,6 @@ MatchEngine::setNodeUp(bool nodeUp) _nodeUp = nodeUp; } -void -MatchEngine::setNodeMaintenance(bool nodeMaintenance) -{ - _nodeMaintenance = nodeMaintenance; - if (nodeMaintenance) { - _nodeUp = false; - } -} StatusReport::UP MatchEngine::reportStatus() const diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h index 7cc0c97048b..b4e32c45003 100644 --- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h +++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h @@ -26,7 +26,6 @@ private: vespalib::ThreadStackExecutor _executor; vespalib::SimpleThreadBundle::Pool _threadBundlePool; bool _nodeUp; - bool _nodeMaintenance; public: /** @@ -132,13 +131,6 @@ public: */ void setNodeUp(bool nodeUp); - /** - * Set node into maintenance, based on info from cluster controller. Note that - * nodeMaintenance == true also implies setNodeUp(false), as the node is technically - * not in a Up state. - */ - void setNodeMaintenance(bool nodeMaintenance); - StatusReport::UP reportStatus() const; search::engine::SearchReply::UP search( diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp index 0d9b2f6aaf6..c15be9336fe 100644 --- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp @@ -68,8 +68,7 @@ BucketHandler::BucketHandler(vespalib::Executor &executor) _executor(executor), _ready(nullptr), _changedHandlers(), - _nodeUp(false), - _nodeMaintenance(false) + _nodeUp(false) { LOG(spam, "BucketHandler::BucketHandler"); } @@ -144,32 +143,13 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets, })); } -namespace { -constexpr const char* bool_str(bool v) noexcept { - return v ? "true" : "false"; -} -} - void BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalculator> & newCalc) { bool oldNodeUp = _nodeUp; - bool oldNodeMaintenance = _nodeMaintenance; - _nodeUp = newCalc->nodeUp(); // Up, Retired or Initializing - _nodeMaintenance = newCalc->nodeMaintenance(); - LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s", - bool_str(oldNodeUp), bool_str(_nodeUp), - bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance)); - if (_nodeMaintenance) { - return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally. - } - // We implicitly deactivate buckets in two edge cases: - // - Up -> Down (not maintenance; handled above), since the node can not be expected to offer - // any graceful query draining when set Down. - // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate - // results into queries if we transition to an available state. - // The assumption is that the system has already activated buckets on other nodes in such a scenario. - if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) { + _nodeUp = newCalc->nodeUp(); + LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down"); + if (oldNodeUp && !_nodeUp) { deactivateAllActiveBuckets(); } } diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.h b/searchcore/src/vespa/searchcore/proton/server/buckethandler.h index 2344e080450..7f44d2ebd71 100644 --- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.h +++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.h @@ -25,7 +25,6 @@ private: documentmetastore::IBucketHandler *_ready; std::vector<IBucketStateChangedHandler *> _changedHandlers; bool _nodeUp; - bool _nodeMaintenance; void performSetCurrentState(document::BucketId bucketId, storage::spi::BucketInfo::ActiveState newState, diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp index 20cd4ced6b2..3d709bd19d1 100644 --- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp @@ -27,7 +27,6 @@ private: bool _nodeUp; bool _nodeInitializing; bool _nodeRetired; - bool _nodeMaintenance; public: ClusterStateAdapter(const ClusterState &calc) @@ -35,8 +34,7 @@ public: _clusterUp(_calc.clusterUp()), _nodeUp(_calc.nodeUp()), _nodeInitializing(_calc.nodeInitializing()), - _nodeRetired(_calc.nodeRetired()), - _nodeMaintenance(_calc.nodeMaintenance()) + _nodeRetired(_calc.nodeRetired()) { } vespalib::Trinary shouldBeReady(const document::Bucket &bucket) const override { @@ -46,7 +44,6 @@ public: bool nodeUp() const override { return _nodeUp; } bool nodeInitializing() const override { return _nodeInitializing; } bool nodeRetired() const override { return _nodeRetired; } - bool nodeMaintenance() const noexcept override { return _nodeMaintenance; } }; } @@ -56,12 +53,11 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe { LOG(debug, "performSetClusterState(): " - "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)" + "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)" "changedHandlers.size() = %zu", (calc->clusterUp() ? "true" : "false"), (calc->nodeUp() ? "true" : "false"), (calc->nodeInitializing() ? "true" : "false"), - (calc->nodeMaintenance() ? "true" : "false"), _changedHandlers.size()); if (!_changedHandlers.empty()) { auto newCalc = std::make_shared<ClusterStateAdapter>(*calc); diff --git a/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h b/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h index 9534f346d1f..b60a5ad8175 100644 --- a/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h +++ b/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h @@ -15,7 +15,6 @@ struct IBucketStateCalculator virtual bool nodeUp() const = 0; virtual bool nodeInitializing() const = 0; virtual bool nodeRetired() const = 0; - virtual bool nodeMaintenance() const noexcept = 0; virtual ~IBucketStateCalculator() = default; }; diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp index 5b21242e397..275f9029107 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp +++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp @@ -865,11 +865,10 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat // forward info sent by cluster controller to persistence engine // about whether node is supposed to be up or not. Match engine // needs to know this in order to stop serving queries. - bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity + bool nodeUpInBucketSpace(calc.nodeUp()); bool nodeRetired(calc.nodeRetired()); bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace); _matchEngine->setNodeUp(nodeUp); - _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance if (_memoryFlushConfigUpdater) { _memoryFlushConfigUpdater->setNodeRetired(nodeRetired); } diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h index 6fd31352051..6b0bef50cae 100644 --- a/searchcore/src/vespa/searchcore/proton/server/proton.h +++ b/searchcore/src/vespa/searchcore/proton/server/proton.h @@ -133,7 +133,6 @@ private: uint32_t getDistributionKey() const override { return _distributionKey; } BootstrapConfig::SP getActiveConfigSnapshot() const; std::shared_ptr<IDocumentDBReferenceRegistry> getDocumentDBReferenceRegistry() const override; - // Returns true if the node is up in _any_ bucket space bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace); void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor); public: diff --git a/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h b/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h index e218058f01e..a5a0185d787 100644 --- a/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h +++ b/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h @@ -20,7 +20,6 @@ private: bool _clusterUp; bool _nodeUp; bool _nodeRetired; - bool _nodeMaintenance; public: typedef std::shared_ptr<BucketStateCalculator> SP; @@ -29,8 +28,7 @@ public: _asked(), _clusterUp(true), _nodeUp(true), - _nodeRetired(false), - _nodeMaintenance(false) + _nodeRetired(false) { } BucketStateCalculator &addReady(const document::BucketId &bucket) { @@ -56,15 +54,6 @@ public: return *this; } - BucketStateCalculator& setNodeMaintenance(bool maintenance) noexcept { - _nodeMaintenance = maintenance; - if (maintenance) { - _nodeUp = false; - _nodeRetired = false; - } - return *this; - } - const BucketIdVector &asked() const noexcept { return _asked; } void resetAsked() { _asked.clear(); } @@ -78,7 +67,6 @@ public: bool nodeUp() const override { return _nodeUp; } bool nodeRetired() const override { return _nodeRetired; } bool nodeInitializing() const override { return false; } - bool nodeMaintenance() const noexcept override { return _nodeMaintenance; } }; } diff --git a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp index 5978f71db2f..597bb4b07ff 100644 --- a/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp +++ b/storage/src/tests/persistence/filestorage/deactivatebucketstest.cpp @@ -14,46 +14,11 @@ using namespace ::testing; namespace storage { struct DeactivateBucketsTest : FileStorTestFixture { - std::unique_ptr<TestFileStorComponents> _c; - - [[nodiscard]] bool is_active(const document::BucketId&) const; - - void SetUp() override { - FileStorTestFixture::SetUp(); - _c = std::make_unique<TestFileStorComponents>(*this); - - std::string up_state("storage:2 distributor:2"); - _node->getStateUpdater().setClusterState( - std::make_shared<const lib::ClusterState>(up_state)); - - createBucket(test_bucket()); - - api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); - { - StorBucketDatabase::WrappedEntry entry( - _node->getStorageBucketDatabase().get(test_bucket(), "foo", - StorBucketDatabase::CREATE_IF_NONEXISTING)); - entry->info = serviceLayerInfo; - entry.write(); - } - } - - void TearDown() override { - _c.reset(); - FileStorTestFixture::TearDown(); - } - - static document::BucketId test_bucket() noexcept { - return {8, 123}; - } - - static std::shared_ptr<const lib::ClusterState> state_of(const char* str) { - return std::make_shared<const lib::ClusterState>(str); - } + bool isActive(const document::BucketId&) const; }; bool -DeactivateBucketsTest::is_active(const document::BucketId& bucket) const +DeactivateBucketsTest::isActive(const document::BucketId& bucket) const { StorBucketDatabase::WrappedEntry entry( _node->getStorageBucketDatabase().get(bucket, "foo")); @@ -61,53 +26,31 @@ DeactivateBucketsTest::is_active(const document::BucketId& bucket) const return entry->info.isActive(); } -TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_marked_down) -{ - EXPECT_TRUE(is_active(test_bucket())); - _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:d distributor:2")); - // Buckets should have been deactivated in content layer - EXPECT_FALSE(is_active(test_bucket())); -} - -TEST_F(DeactivateBucketsTest, buckets_not_deactivated_when_node_marked_maintenance) -{ - EXPECT_TRUE(is_active(test_bucket())); - _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); - EXPECT_TRUE(is_active(test_bucket())); -} - -TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_up) -{ - EXPECT_TRUE(is_active(test_bucket())); - _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); - _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); - EXPECT_FALSE(is_active(test_bucket())); -} - -TEST_F(DeactivateBucketsTest, buckets_deactivated_when_node_goes_from_maintenance_to_down) -{ - EXPECT_TRUE(is_active(test_bucket())); - _node->getStateUpdater().setClusterState(state_of("storage:2 .1.s:m distributor:2")); - _node->getStateUpdater().setClusterState(state_of("storage:2 distributor:2")); - EXPECT_FALSE(is_active(test_bucket())); -} +TEST_F(DeactivateBucketsTest, buckets_in_database_deactivated_when_node_down_in_cluster_state) { + TestFileStorComponents c(*this); + // Must set state to up first, or down-edge case won't trigger. + std::string upState("storage:2 distributor:2"); + _node->getStateUpdater().setClusterState( + lib::ClusterState::CSP(new lib::ClusterState(upState))); + + document::BucketId bucket(8, 123); + + createBucket(bucket); + api::BucketInfo serviceLayerInfo(1, 2, 3, 4, 5, true, true); + { + StorBucketDatabase::WrappedEntry entry( + _node->getStorageBucketDatabase().get(bucket, "foo", + StorBucketDatabase::CREATE_IF_NONEXISTING)); + entry->info = serviceLayerInfo; + entry.write(); + } + EXPECT_TRUE(isActive(bucket)); + std::string downState("storage:2 .1.s:d distributor:2"); + _node->getStateUpdater().setClusterState( + lib::ClusterState::CSP(new lib::ClusterState(downState))); -// If we only have a subset of the bucket spaces in maintenance mode (i.e. global -// bucket merge enforcement), we treat this as the node being down from the perspective -// of default space bucket deactivation. -TEST_F(DeactivateBucketsTest, bucket_space_subset_in_maintenance_deactivates_buckets) -{ - EXPECT_TRUE(is_active(test_bucket())); - auto derived = lib::ClusterStateBundle::BucketSpaceStateMapping({ - {document::FixedBucketSpaces::default_space(), state_of("storage:2 .1.s:m distributor:2")}, - {document::FixedBucketSpaces::global_space(), state_of("storage:2 distributor:2")} - }); - _node->getStateUpdater().setClusterStateBundle( - std::make_shared<const lib::ClusterStateBundle>(*state_of("storage:2 .1.s:m distributor:2"), - std::move(derived))); - EXPECT_FALSE(is_active(test_bucket())); + // Buckets should have been deactivated in content layer + EXPECT_FALSE(isActive(bucket)); } -// TODO should also test SPI interaction - } // namespace storage diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp index 1d78f77525a..56bab9c6edc 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space.cpp @@ -57,18 +57,4 @@ ContentBucketSpace::setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNode _nodeUpInLastNodeStateSeenByProvider = nodeUpInLastNodeStateSeenByProvider; } -bool -ContentBucketSpace::getNodeMaintenanceInLastNodeStateSeenByProvider() const -{ - std::lock_guard guard(_lock); - return _nodeMaintenanceInLastNodeStateSeenByProvider; -} - -void -ContentBucketSpace::setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider) -{ - std::lock_guard guard(_lock); - _nodeMaintenanceInLastNodeStateSeenByProvider = nodeMaintenanceInLastNodeStateSeenByProvider; -} - } diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h index 836cd6e15f8..63379d6b8ee 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.h +++ b/storage/src/vespa/storage/common/content_bucket_space.h @@ -23,7 +23,6 @@ private: std::shared_ptr<const lib::ClusterState> _clusterState; std::shared_ptr<const lib::Distribution> _distribution; bool _nodeUpInLastNodeStateSeenByProvider; - bool _nodeMaintenanceInLastNodeStateSeenByProvider; public: using UP = std::unique_ptr<ContentBucketSpace>; @@ -37,8 +36,6 @@ public: std::shared_ptr<const lib::Distribution> getDistribution() const; bool getNodeUpInLastNodeStateSeenByProvider() const; void setNodeUpInLastNodeStateSeenByProvider(bool nodeUpInLastNodeStateSeenByProvider); - bool getNodeMaintenanceInLastNodeStateSeenByProvider() const; - void setNodeMaintenanceInLastNodeStateSeenByProvider(bool nodeMaintenanceInLastNodeStateSeenByProvider); }; } diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h index 048c2c266f0..7038e9cb1aa 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.h +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h @@ -21,8 +21,8 @@ private: public: explicit ContentBucketSpaceRepo(const ContentBucketDbOptions&); ContentBucketSpace &get(document::BucketSpace bucketSpace) const; - BucketSpaceMap::const_iterator begin() const noexcept { return _map.begin(); } - BucketSpaceMap::const_iterator end() const noexcept { return _map.end(); } + BucketSpaceMap::const_iterator begin() const { return _map.begin(); } + BucketSpaceMap::const_iterator end() const { return _map.end(); } BucketSpaces getBucketSpaces() const; size_t getBucketMemoryUsage() const; diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index fc87845315f..c32efb6aa66 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -23,7 +23,6 @@ #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/idestructorcallback.h> #include <vespa/vespalib/util/sequencedtaskexecutor.h> -#include <algorithm> #include <thread> #include <vespa/log/bufferedlogger.h> @@ -883,64 +882,28 @@ namespace { }; } -bool -FileStorManager::maintenance_in_all_spaces(const lib::Node& node) const noexcept -{ - return std::ranges::all_of(_component.getBucketSpaceRepo(), [&](const auto& elem) { - ContentBucketSpace& bucket_space = *elem.second; - auto derived_cluster_state = bucket_space.getClusterState(); - return derived_cluster_state->getNodeState(node).getState().oneOf("m"); - }); -} - -bool -FileStorManager::should_deactivate_buckets(const ContentBucketSpace& space, - bool node_up_in_space, - bool maintenance_in_all_spaces) noexcept -{ - // Important: this MUST match the semantics in proton::BucketHandler::notifyClusterStateChanged()! - // Otherwise, the content layer and proton will be out of sync in terms of bucket activation state. - if (maintenance_in_all_spaces) { - return false; - } - return ((space.getNodeUpInLastNodeStateSeenByProvider() && !node_up_in_space) - || space.getNodeMaintenanceInLastNodeStateSeenByProvider()); -} - -void -FileStorManager::maybe_log_received_cluster_state() -{ - if (LOG_WOULD_LOG(debug)) { - auto cluster_state_bundle = _component.getStateUpdater().getClusterStateBundle(); - auto baseline_state = cluster_state_bundle->getBaselineClusterState(); - LOG(debug, "FileStorManager received baseline cluster state '%s'", baseline_state->toString().c_str()); - } -} - void FileStorManager::updateState() { - maybe_log_received_cluster_state(); - const lib::Node node(_component.getNodeType(), _component.getIndex()); - const bool in_maintenance = maintenance_in_all_spaces(node); + auto clusterStateBundle = _component.getStateUpdater().getClusterStateBundle(); + lib::ClusterState::CSP state(clusterStateBundle->getBaselineClusterState()); + lib::Node node(_component.getNodeType(), _component.getIndex()); + LOG(debug, "FileStorManager received cluster state '%s'", state->toString().c_str()); for (const auto &elem : _component.getBucketSpaceRepo()) { BucketSpace bucketSpace(elem.first); - ContentBucketSpace& contentBucketSpace = *elem.second; + ContentBucketSpace &contentBucketSpace = *elem.second; auto derivedClusterState = contentBucketSpace.getClusterState(); - const bool node_up_in_space = derivedClusterState->getNodeState(node).getState().oneOf("uir"); - if (should_deactivate_buckets(contentBucketSpace, node_up_in_space, in_maintenance)) { - LOG(debug, "Received cluster state where this node is down; de-activating all buckets " - "in database for bucket space %s", bucketSpace.toString().c_str()); + bool nodeUp = derivedClusterState->getNodeState(node).getState().oneOf("uir"); + // If edge where we go down + if (contentBucketSpace.getNodeUpInLastNodeStateSeenByProvider() && !nodeUp) { + LOG(debug, "Received cluster state where this node is down; de-activating all buckets in database for bucket space %s", bucketSpace.toString().c_str()); Deactivator deactivator; contentBucketSpace.bucketDatabase().for_each_mutable_unordered( std::ref(deactivator), "FileStorManager::updateState"); } - contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(node_up_in_space); - contentBucketSpace.setNodeMaintenanceInLastNodeStateSeenByProvider(in_maintenance); - spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), - *contentBucketSpace.getDistribution(), - in_maintenance); + contentBucketSpace.setNodeUpInLastNodeStateSeenByProvider(nodeUp); + spi::ClusterState spiState(*derivedClusterState, _component.getIndex(), *contentBucketSpace.getDistribution()); _provider->setClusterState(bucketSpace, spiState); } } diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h index b7450de13d8..76cb31f32d4 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.h +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.h @@ -39,7 +39,6 @@ namespace api { } namespace spi { struct PersistenceProvider; } -class ContentBucketSpace; struct FileStorManagerTest; class ReadBucketList; class BucketOwnershipNotifier; @@ -171,11 +170,6 @@ private: void onFlush(bool downwards) override; void reportHtmlStatus(std::ostream&, const framework::HttpUrlPath&) const override; void storageDistributionChanged() override; - [[nodiscard]] static bool should_deactivate_buckets(const ContentBucketSpace& space, - bool node_up_in_space, - bool maintenance_in_all_spaces) noexcept; - [[nodiscard]] bool maintenance_in_all_spaces(const lib::Node& node) const noexcept; - void maybe_log_received_cluster_state(); void updateState(); void propagateClusterStates(); void update_reported_state_after_db_init(); |