summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-11-23 21:12:52 +0100
committerGitHub <noreply@github.com>2021-11-23 21:12:52 +0100
commit6e7385e7858ee5491f028c7012d9928ea340d678 (patch)
tree20706e75771d68979fa5b6219f49791ed4dd4c8f /searchcore
parenta2f9a7b7d6afcd9e9567e53e5a0f489bddaf3cb4 (diff)
parent3d31f8967fc8970835b14a615f393ec4acda3394 (diff)
Merge pull request #20156 from vespa-engine/vekterli/allow-searches-when-node-is-in-maintenance-mode
Continue serving search queries when in Maintenance node state [run-systemtest]
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp43
-rw-r--r--searchcore/src/tests/proton/matchengine/matchengine.cpp64
-rw-r--r--searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp14
-rw-r--r--searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h8
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp28
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/buckethandler.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp8
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.cpp3
-rw-r--r--searchcore/src/vespa/searchcore/proton/server/proton.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h14
11 files changed, 164 insertions, 21 deletions
diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
index 29748a2010c..cadfa8cd72f 100644
--- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
+++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp
@@ -140,6 +140,11 @@ struct Fixture
setNodeUp(bool value)
{
_calc->setNodeUp(value);
+ _calc->setNodeMaintenance(false);
+ _handler.notifyClusterStateChanged(_calc);
+ }
+ void setNodeMaintenance(bool value) {
+ _calc->setNodeMaintenance(value);
_handler.notifyClusterStateChanged(_calc);
}
};
@@ -223,7 +228,7 @@ TEST_F("require that unready bucket can be reported as active", Fixture)
}
-TEST_F("require that node being down deactivates buckets", Fixture)
+TEST_F("node going down (but not into maintenance state) deactivates all buckets", Fixture)
{
f._handler.handleSetCurrentState(f._ready.bucket(2),
BucketInfo::ACTIVE, f._genResult);
@@ -252,6 +257,42 @@ TEST_F("require that node being down deactivates buckets", Fixture)
EXPECT_EQUAL(true, f._bucketInfo.getInfo().isActive());
}
+TEST_F("node going into maintenance state does _not_ deactivate any buckets", Fixture)
+{
+ f._handler.handleSetCurrentState(f._ready.bucket(2),
+ BucketInfo::ACTIVE, f._genResult);
+ f.sync();
+ f.setNodeMaintenance(true);
+ f.sync();
+ f.handleGetBucketInfo(f._ready.bucket(2));
+ EXPECT_TRUE(f._bucketInfo.getInfo().isActive());
+}
+
+TEST_F("node going from maintenance to up state deactivates all buckets", Fixture)
+{
+ f._handler.handleSetCurrentState(f._ready.bucket(2),
+ BucketInfo::ACTIVE, f._genResult);
+ f.sync();
+ f.setNodeMaintenance(true);
+ f.sync();
+ f.setNodeUp(true);
+ f.sync();
+ f.handleGetBucketInfo(f._ready.bucket(2));
+ EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
+}
+
+TEST_F("node going from maintenance to down state deactivates all buckets", Fixture)
+{
+ f._handler.handleSetCurrentState(f._ready.bucket(2),
+ BucketInfo::ACTIVE, f._genResult);
+ f.sync();
+ f.setNodeMaintenance(true);
+ f.sync();
+ f.setNodeUp(false);
+ f.sync();
+ f.handleGetBucketInfo(f._ready.bucket(2));
+ EXPECT_FALSE(f._bucketInfo.getInfo().isActive());
+}
TEST_MAIN()
{
diff --git a/searchcore/src/tests/proton/matchengine/matchengine.cpp b/searchcore/src/tests/proton/matchengine/matchengine.cpp
index 481a9f061be..34c36fd9a72 100644
--- a/searchcore/src/tests/proton/matchengine/matchengine.cpp
+++ b/searchcore/src/tests/proton/matchengine/matchengine.cpp
@@ -17,7 +17,7 @@ class MySearchHandler : public ISearchHandler {
std::string _name;
std::string _reply;
public:
- MySearchHandler(size_t numHits = 0) :
+ explicit MySearchHandler(size_t numHits = 0) :
_numHits(numHits), _name("my"), _reply("myreply")
{}
DocsumReply::UP getDocsums(const DocsumRequest &) override {
@@ -91,6 +91,7 @@ assertSearchReply(MatchEngine & engine, const std::string & searchDocType, size_
LocalSearchClient client;
engine.search(SearchRequest::Source(request), client);
SearchReply::UP reply = client.getReply(10000);
+ ASSERT_TRUE(reply);
return EXPECT_EQUAL(expHits, reply->hits.size());
}
@@ -173,11 +174,24 @@ TEST("requireThatEmptySearchReplyIsReturnedWhenEngineIsClosed")
LocalSearchClient client;
SearchRequest::Source request(new SearchRequest());
SearchReply::UP reply = engine.search(std::move(request), client);
- EXPECT_TRUE(reply );
+ ASSERT_TRUE(reply);
EXPECT_EQUAL(0u, reply->hits.size());
EXPECT_EQUAL(7u, reply->getDistributionKey());
}
+namespace {
+
+constexpr const char* search_interface_offline_slime_str() noexcept {
+ return "{\n"
+ " \"status\": {\n"
+ " \"state\": \"OFFLINE\",\n"
+ " \"message\": \"Search interface is offline\"\n"
+ " }\n"
+ "}\n";
+}
+
+}
+
TEST("requireThatStateIsReported")
{
MatchEngine engine(1, 1, 7);
@@ -185,14 +199,44 @@ TEST("requireThatStateIsReported")
Slime slime;
SlimeInserter inserter(slime);
engine.get_state(inserter, false);
- EXPECT_EQUAL(
- "{\n"
- " \"status\": {\n"
- " \"state\": \"OFFLINE\",\n"
- " \"message\": \"Search interface is offline\"\n"
- " }\n"
- "}\n",
- slime.toString());
+ EXPECT_EQUAL(search_interface_offline_slime_str(),
+ slime.toString());
+}
+
+TEST("searches are executed when node is in maintenance mode")
+{
+ MatchEngine engine(1, 1, 7);
+ engine.setNodeMaintenance(true);
+ engine.putSearchHandler(DocTypeName("foo"), std::make_shared<MySearchHandler>(3));
+ EXPECT_TRUE(assertSearchReply(engine, "foo", 3));
+}
+
+TEST("setNodeMaintenance(true) implies setNodeUp(false)")
+{
+ MatchEngine engine(1, 1, 7);
+ engine.setNodeUp(true);
+ engine.setNodeMaintenance(true);
+ EXPECT_FALSE(engine.isOnline());
+}
+
+TEST("setNodeMaintenance(false) does not imply setNodeUp(false)")
+{
+ MatchEngine engine(1, 1, 7);
+ engine.setNodeUp(true);
+ engine.setNodeMaintenance(false);
+ EXPECT_TRUE(engine.isOnline());
+}
+
+TEST("search interface is reported as offline when node is in maintenance mode")
+{
+ MatchEngine engine(1, 1, 7);
+ engine.setNodeMaintenance(true);
+
+ Slime slime;
+ SlimeInserter inserter(slime);
+ engine.get_state(inserter, false);
+ EXPECT_EQUAL(search_interface_offline_slime_str(),
+ slime.toString());
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp
index 5ad4a7ed52b..58dc473b85e 100644
--- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.cpp
@@ -50,7 +50,8 @@ MatchEngine::MatchEngine(size_t numThreads, size_t threadsPerSearch, uint32_t di
_handlers(),
_executor(std::max(size_t(1), numThreads / threadsPerSearch), 256_Ki, match_engine_executor),
_threadBundlePool(std::max(size_t(1), threadsPerSearch)),
- _nodeUp(false)
+ _nodeUp(false),
+ _nodeMaintenance(false)
{
}
@@ -98,7 +99,8 @@ search::engine::SearchReply::UP
MatchEngine::search(search::engine::SearchRequest::Source request,
search::engine::SearchClient &client)
{
- if (_closed || !_nodeUp) {
+ // We continue to allow searches if the node is in Maintenance mode
+ if (_closed || (!_nodeUp && !_nodeMaintenance)) {
auto ret = std::make_unique<search::engine::SearchReply>();
ret->setDistributionKey(_distributionKey);
@@ -177,6 +179,14 @@ MatchEngine::setNodeUp(bool nodeUp)
_nodeUp = nodeUp;
}
+void
+MatchEngine::setNodeMaintenance(bool nodeMaintenance)
+{
+ _nodeMaintenance = nodeMaintenance;
+ if (nodeMaintenance) {
+ _nodeUp = false;
+ }
+}
StatusReport::UP
MatchEngine::reportStatus() const
diff --git a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h
index b4e32c45003..7cc0c97048b 100644
--- a/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h
+++ b/searchcore/src/vespa/searchcore/proton/matchengine/matchengine.h
@@ -26,6 +26,7 @@ private:
vespalib::ThreadStackExecutor _executor;
vespalib::SimpleThreadBundle::Pool _threadBundlePool;
bool _nodeUp;
+ bool _nodeMaintenance;
public:
/**
@@ -131,6 +132,13 @@ public:
*/
void setNodeUp(bool nodeUp);
+ /**
+ * Set node into maintenance, based on info from cluster controller. Note that
+ * nodeMaintenance == true also implies setNodeUp(false), as the node is technically
+ * not in a Up state.
+ */
+ void setNodeMaintenance(bool nodeMaintenance);
+
StatusReport::UP reportStatus() const;
search::engine::SearchReply::UP search(
diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
index c15be9336fe..0d9b2f6aaf6 100644
--- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.cpp
@@ -68,7 +68,8 @@ BucketHandler::BucketHandler(vespalib::Executor &executor)
_executor(executor),
_ready(nullptr),
_changedHandlers(),
- _nodeUp(false)
+ _nodeUp(false),
+ _nodeMaintenance(false)
{
LOG(spam, "BucketHandler::BucketHandler");
}
@@ -143,13 +144,32 @@ BucketHandler::handlePopulateActiveBuckets(document::BucketId::List &buckets,
}));
}
+namespace {
+constexpr const char* bool_str(bool v) noexcept {
+ return v ? "true" : "false";
+}
+}
+
void
BucketHandler::notifyClusterStateChanged(const std::shared_ptr<IBucketStateCalculator> & newCalc)
{
bool oldNodeUp = _nodeUp;
- _nodeUp = newCalc->nodeUp();
- LOG(spam, "notifyClusterStateChanged: %s -> %s", oldNodeUp ? "up" : "down", _nodeUp ? "up" : "down");
- if (oldNodeUp && !_nodeUp) {
+ bool oldNodeMaintenance = _nodeMaintenance;
+ _nodeUp = newCalc->nodeUp(); // Up, Retired or Initializing
+ _nodeMaintenance = newCalc->nodeMaintenance();
+ LOG(spam, "notifyClusterStateChanged; up: %s -> %s, maintenance: %s -> %s",
+ bool_str(oldNodeUp), bool_str(_nodeUp),
+ bool_str(oldNodeMaintenance), bool_str(_nodeMaintenance));
+ if (_nodeMaintenance) {
+ return; // Don't deactivate buckets in maintenance mode; let query traffic drain away naturally.
+ }
+ // We implicitly deactivate buckets in two edge cases:
+ // - Up -> Down (not maintenance; handled above), since the node can not be expected to offer
+ // any graceful query draining when set Down.
+ // - Maintenance -> !Maintenance, since we'd otherwise introduce a bunch of transient duplicate
+ // results into queries if we transition to an available state.
+ // The assumption is that the system has already activated buckets on other nodes in such a scenario.
+ if ((oldNodeUp && !_nodeUp) || oldNodeMaintenance) {
deactivateAllActiveBuckets();
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/buckethandler.h b/searchcore/src/vespa/searchcore/proton/server/buckethandler.h
index 7f44d2ebd71..2344e080450 100644
--- a/searchcore/src/vespa/searchcore/proton/server/buckethandler.h
+++ b/searchcore/src/vespa/searchcore/proton/server/buckethandler.h
@@ -25,6 +25,7 @@ private:
documentmetastore::IBucketHandler *_ready;
std::vector<IBucketStateChangedHandler *> _changedHandlers;
bool _nodeUp;
+ bool _nodeMaintenance;
void performSetCurrentState(document::BucketId bucketId,
storage::spi::BucketInfo::ActiveState newState,
diff --git a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
index 3d709bd19d1..20cd4ced6b2 100644
--- a/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/clusterstatehandler.cpp
@@ -27,6 +27,7 @@ private:
bool _nodeUp;
bool _nodeInitializing;
bool _nodeRetired;
+ bool _nodeMaintenance;
public:
ClusterStateAdapter(const ClusterState &calc)
@@ -34,7 +35,8 @@ public:
_clusterUp(_calc.clusterUp()),
_nodeUp(_calc.nodeUp()),
_nodeInitializing(_calc.nodeInitializing()),
- _nodeRetired(_calc.nodeRetired())
+ _nodeRetired(_calc.nodeRetired()),
+ _nodeMaintenance(_calc.nodeMaintenance())
{
}
vespalib::Trinary shouldBeReady(const document::Bucket &bucket) const override {
@@ -44,6 +46,7 @@ public:
bool nodeUp() const override { return _nodeUp; }
bool nodeInitializing() const override { return _nodeInitializing; }
bool nodeRetired() const override { return _nodeRetired; }
+ bool nodeMaintenance() const noexcept override { return _nodeMaintenance; }
};
}
@@ -53,11 +56,12 @@ ClusterStateHandler::performSetClusterState(const ClusterState *calc, IGenericRe
{
LOG(debug,
"performSetClusterState(): "
- "clusterUp(%s), nodeUp(%s), nodeInitializing(%s)"
+ "clusterUp(%s), nodeUp(%s), nodeInitializing(%s), nodeMaintenance(%s)"
"changedHandlers.size() = %zu",
(calc->clusterUp() ? "true" : "false"),
(calc->nodeUp() ? "true" : "false"),
(calc->nodeInitializing() ? "true" : "false"),
+ (calc->nodeMaintenance() ? "true" : "false"),
_changedHandlers.size());
if (!_changedHandlers.empty()) {
auto newCalc = std::make_shared<ClusterStateAdapter>(*calc);
diff --git a/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h b/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h
index b60a5ad8175..9534f346d1f 100644
--- a/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h
+++ b/searchcore/src/vespa/searchcore/proton/server/ibucketstatecalculator.h
@@ -15,6 +15,7 @@ struct IBucketStateCalculator
virtual bool nodeUp() const = 0;
virtual bool nodeInitializing() const = 0;
virtual bool nodeRetired() const = 0;
+ virtual bool nodeMaintenance() const noexcept = 0;
virtual ~IBucketStateCalculator() = default;
};
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.cpp b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
index 275f9029107..5b21242e397 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.cpp
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.cpp
@@ -865,10 +865,11 @@ Proton::setClusterState(BucketSpace bucketSpace, const storage::spi::ClusterStat
// forward info sent by cluster controller to persistence engine
// about whether node is supposed to be up or not. Match engine
// needs to know this in order to stop serving queries.
- bool nodeUpInBucketSpace(calc.nodeUp());
+ bool nodeUpInBucketSpace(calc.nodeUp()); // TODO rename calculator function to imply bucket space affinity
bool nodeRetired(calc.nodeRetired());
bool nodeUp = updateNodeUp(bucketSpace, nodeUpInBucketSpace);
_matchEngine->setNodeUp(nodeUp);
+ _matchEngine->setNodeMaintenance(calc.nodeMaintenance()); // Note: _all_ bucket spaces in maintenance
if (_memoryFlushConfigUpdater) {
_memoryFlushConfigUpdater->setNodeRetired(nodeRetired);
}
diff --git a/searchcore/src/vespa/searchcore/proton/server/proton.h b/searchcore/src/vespa/searchcore/proton/server/proton.h
index 6b0bef50cae..6fd31352051 100644
--- a/searchcore/src/vespa/searchcore/proton/server/proton.h
+++ b/searchcore/src/vespa/searchcore/proton/server/proton.h
@@ -133,6 +133,7 @@ private:
uint32_t getDistributionKey() const override { return _distributionKey; }
BootstrapConfig::SP getActiveConfigSnapshot() const;
std::shared_ptr<IDocumentDBReferenceRegistry> getDocumentDBReferenceRegistry() const override;
+ // Returns true if the node is up in _any_ bucket space
bool updateNodeUp(BucketSpace bucketSpace, bool nodeUpInBucketSpace);
void closeDocumentDBs(vespalib::ThreadStackExecutorBase & executor);
public:
diff --git a/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h b/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h
index a5a0185d787..e218058f01e 100644
--- a/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h
+++ b/searchcore/src/vespa/searchcore/proton/test/bucketstatecalculator.h
@@ -20,6 +20,7 @@ private:
bool _clusterUp;
bool _nodeUp;
bool _nodeRetired;
+ bool _nodeMaintenance;
public:
typedef std::shared_ptr<BucketStateCalculator> SP;
@@ -28,7 +29,8 @@ public:
_asked(),
_clusterUp(true),
_nodeUp(true),
- _nodeRetired(false)
+ _nodeRetired(false),
+ _nodeMaintenance(false)
{
}
BucketStateCalculator &addReady(const document::BucketId &bucket) {
@@ -54,6 +56,15 @@ public:
return *this;
}
+ BucketStateCalculator& setNodeMaintenance(bool maintenance) noexcept {
+ _nodeMaintenance = maintenance;
+ if (maintenance) {
+ _nodeUp = false;
+ _nodeRetired = false;
+ }
+ return *this;
+ }
+
const BucketIdVector &asked() const noexcept { return _asked; }
void resetAsked() { _asked.clear(); }
@@ -67,6 +78,7 @@ public:
bool nodeUp() const override { return _nodeUp; }
bool nodeRetired() const override { return _nodeRetired; }
bool nodeInitializing() const override { return false; }
+ bool nodeMaintenance() const noexcept override { return _nodeMaintenance; }
};
}