aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2021-11-23 15:46:19 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2021-11-24 10:13:50 +0000
commit7fe561bae2574f6ce70fe3255c57ff7308310b9a (patch)
treebdd8d0c9ad1f1f2cbfe302a8c96661401ff28c16 /searchcore
parenta428b8f2754eec0b3748451b7aca79bc00d0a04e (diff)
Handle case where bucket spaces have differing maintenance state for a node
Only skip deactivating buckets if the entire _node_ is marked as maintenance state, i.e. the node has maintenance state across all bucket spaces provided in the bundle. Otherwise treat the state transition as if the node goes down, deactivating all buckets. Also ensure that the bucket deactivation logic above the SPI is identical to that within Proton. This avoids bucket DBs getting out of sync between the two.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp13
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp18
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.cpp4
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.h1
5 files changed, 31 insertions, 8 deletions
diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
index 82410d28610..cadfa8cd72f 100644
--- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
@@ -281,6 +281,19 @@ TEST_F("node going from maintenance to up state deactivates all buckets", Fixtur
EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
}
+TEST_F("node going from maintenance to down state deactivates all buckets", Fixture)
+{
+ f._handler.handleSetCurrentState(f._ready.bucket(2),
+ BucketInfo::ACTIVE, f._genResult);
+ f.sync();
+ f.setNodeMaintenance(true);
+ f.sync();
+ f.setNodeUp(false);
+ f.sync();
+ f.handleGetBucketInfo(f._ready.bucket(2));
+ EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
+}
+
TEST_MAIN()
{
TEST_RUN_ALL();
diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
index d329d9ce27c..0d9b2f6aaf6 100644
--- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
@@ -144,6 +144,12 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets,
}));
}
+namespace {
+constexpr const char* bool_str(bool v) noexcept {
+ return v ? "true" : "false";
+}
+}
+
void
BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalculator> & newCalc)
{
@@ -151,17 +157,19 @@ BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalcu
bool oldNodeMaintenance = _nodeMaintenance;
_nodeUp = newCalc->nodeUp(); // Up, Retired or Initializing
_nodeMaintenance = newCalc->nodeMaintenance();
- LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down");
+ LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s",
+ bool_str(oldNodeUp), bool_str(_nodeUp),
+ bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance));
if (_nodeMaintenance) {
return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally.
}
// We implicitly deactivate buckets in two edge cases:
// - Up -> Down (not maintenance; handled above), since the node can not be expected to offer
// any graceful query draining when set Down.
- // - Maintenance -> Up, since we'd otherwise introduce a bunch of transient duplicate results
- // into queries. The assumption is that the system has already activated buckets on other
- // nodes in such a scenario.
- if ((oldNodeUp && !_nodeUp) || (oldNodeMaintenance && _nodeUp)) {
+ // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate
+ // results into queries if we transition to an available state.
+ // The assumption is that the system has already activated buckets on other nodes in such a scenario.
+ if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) {
deactivateAllActiveBuckets();
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
index 137b2596eeb..20cd4ced6b2 100644
--- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
@@ -56,11 +56,12 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe
{
LOG(debug,
"performSetClusterState(): "
- "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)"
+ "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)"
"changedHandlers.size() = %zu",
(calc->clusterUp() ? "true" : "false"),
(calc->nodeUp() ? "true" : "false"),
(calc->nodeInitializing() ? "true" : "false"),
+ (calc->nodeMaintenance() ? "true" : "false"),
_changedHandlers.size());
if (!_changedHandlers.empty()) {
auto newCalc = std::make_shared<ClusterStateAdapter>(*calc);
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
index 621f3acd072..5b21242e397 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
@@ -865,11 +865,11 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat
// forward info sent by cluster controller to persistence engine
// about whether node is supposed to be up or not. Match engine
// needs to know this in order to stop serving queries.
- bool nodeUpInBucketSpace(calc.nodeUp());
+ bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity
bool nodeRetired(calc.nodeRetired());
bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace);
_matchEngine->setNodeUp(nodeUp);
- _matchEngine->setNodeMaintenance(calc.nodeMaintenance());
+ _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance
if (_memoryFlushConfigUpdater) {
_memoryFlushConfigUpdater->setNodeRetired(nodeRetired);
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h
index 6b0bef50cae..6fd31352051 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.h
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.h
@@ -133,6 +133,7 @@ private:
uint32_t getDistributionKey() const override { return _distributionKey; }
BootstrapConfig::SP getActiveConfigSnapshot() const;
std::shared_ptr<IDocumentDBReferenceRegistry> getDocumentDBReferenceRegistry() const override;
+ // Returns true if the node is up in _any_ bucket space
bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace);
void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor);
public: