diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2021-11-22 14:06:42 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2021-11-22 14:53:50 +0000 |
commit | 85af738b99995cc04e87e45983a3c333f1274728 (patch) | |
tree | c28326ece9c04719a0bb441ee3476fdacdeabeb9 /searchcore/src/tests | |
parent | 4cd9450a93aee9b30b3f441250733bf4ee165cda (diff) |
Continue serving search queries when in Maintenance node state
Previously, entering maintenance state would implicitly deactivate
all buckets on the searchnode and cause empty responses to be returned
for searches.
However, container query dispatch uses async health pings to decide
which nodes to route queries to, so it would be possible for a node to
still be used for queries for a few seconds until the ping discovered
that the node should not be used. In the case of multiple groups without
multiple ready replicas within the group, this would cause transient
coverage loss since the dispatcher would not realize it should route
queries to other groups instead.
With this commit, maintenance edge behavior is changed as follows:
- Buckets are _not_ deactivated when going from an available state
to the maintenance state. However, they _are_ deactivate when going
from maintenance state to an available state in order to avoid transient
query duplicates immediately after the change.
- Searches are executed as normal instead of returning empty replies
when the node is in maintenance state.
The following behavior is intentionally _not_ changed:
- The search interface is still marked as offline when in maintenance
state, as this signals that the node should be taken out of rotation.
In particular, it's critical that the RPC health ping response is
explicitly tagged as having zero active docs when the search interface
is offline, even though many buckets may now actually be active.
Otherwise, queries would not be gracefully drained from the node.
Diffstat (limited to 'searchcore/src/tests')
-rw-r--r-- | searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp | 30 | ||||
-rw-r--r-- | searchcore/src/tests/proton/matchengine/matchengine.cpp | 64 |
2 files changed, 83 insertions, 11 deletions
diff --git a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp index 29748a2010c..82410d28610 100644 --- a/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp +++ b/searchcore/src/tests/proton/documentdb/buckethandler/buckethandler_test.cpp @@ -140,6 +140,11 @@ struct Fixture setNodeUp(bool value) { _calc->setNodeUp(value); + _calc->setNodeMaintenance(false); + _handler.notifyClusterStateChanged(_calc); + } + void setNodeMaintenance(bool value) { + _calc->setNodeMaintenance(value); _handler.notifyClusterStateChanged(_calc); } }; @@ -223,7 +228,7 @@ TEST_F("require that unready bucket can be reported as active", Fixture) } -TEST_F("require that node being down deactivates buckets", Fixture) +TEST_F("node going down (but not into maintenance state) deactivates all buckets", Fixture) { f._handler.handleSetCurrentState(f._ready.bucket(2), BucketInfo::ACTIVE, f._genResult); @@ -252,6 +257,29 @@ TEST_F("require that node being down deactivates buckets", Fixture) EXPECT_EQUAL(true, f._bucketInfo.getInfo().isActive()); } +TEST_F("node going into maintenance state does _not_ deactivate any buckets", Fixture) +{ + f._handler.handleSetCurrentState(f._ready.bucket(2), + BucketInfo::ACTIVE, f._genResult); + f.sync(); + f.setNodeMaintenance(true); + f.sync(); + f.handleGetBucketInfo(f._ready.bucket(2)); + EXPECT_TRUE(f._bucketInfo.getInfo().isActive()); +} + +TEST_F("node going from maintenance to up state deactivates all buckets", Fixture) +{ + f._handler.handleSetCurrentState(f._ready.bucket(2), + BucketInfo::ACTIVE, f._genResult); + f.sync(); + f.setNodeMaintenance(true); + f.sync(); + f.setNodeUp(true); + f.sync(); + f.handleGetBucketInfo(f._ready.bucket(2)); + EXPECT_FALSE(f._bucketInfo.getInfo().isActive()); +} TEST_MAIN() { diff --git a/searchcore/src/tests/proton/matchengine/matchengine.cpp b/searchcore/src/tests/proton/matchengine/matchengine.cpp index 481a9f061be..34c36fd9a72 100644 --- a/searchcore/src/tests/proton/matchengine/matchengine.cpp +++ b/searchcore/src/tests/proton/matchengine/matchengine.cpp @@ -17,7 +17,7 @@ class MySearchHandler : public ISearchHandler { std::string _name; std::string _reply; public: - MySearchHandler(size_t numHits = 0) : + explicit MySearchHandler(size_t numHits = 0) : _numHits(numHits), _name("my"), _reply("myreply") {} DocsumReply::UP getDocsums(const DocsumRequest &) override { @@ -91,6 +91,7 @@ assertSearchReply(MatchEngine & engine, const std::string & searchDocType, size_ LocalSearchClient client; engine.search(SearchRequest::Source(request), client); SearchReply::UP reply = client.getReply(10000); + ASSERT_TRUE(reply); return EXPECT_EQUAL(expHits, reply->hits.size()); } @@ -173,11 +174,24 @@ TEST("requireThatEmptySearchReplyIsReturnedWhenEngineIsClosed") LocalSearchClient client; SearchRequest::Source request(new SearchRequest()); SearchReply::UP reply = engine.search(std::move(request), client); - EXPECT_TRUE(reply ); + ASSERT_TRUE(reply); EXPECT_EQUAL(0u, reply->hits.size()); EXPECT_EQUAL(7u, reply->getDistributionKey()); } +namespace { + +constexpr const char* search_interface_offline_slime_str() noexcept { + return "{\n" + " \"status\": {\n" + " \"state\": \"OFFLINE\",\n" + " \"message\": \"Search interface is offline\"\n" + " }\n" + "}\n"; +} + +} + TEST("requireThatStateIsReported") { MatchEngine engine(1, 1, 7); @@ -185,14 +199,44 @@ TEST("requireThatStateIsReported") Slime slime; SlimeInserter inserter(slime); engine.get_state(inserter, false); - EXPECT_EQUAL( - "{\n" - " \"status\": {\n" - " \"state\": \"OFFLINE\",\n" - " \"message\": \"Search interface is offline\"\n" - " }\n" - "}\n", - slime.toString()); + EXPECT_EQUAL(search_interface_offline_slime_str(), + slime.toString()); +} + +TEST("searches are executed when node is in maintenance mode") +{ + MatchEngine engine(1, 1, 7); + engine.setNodeMaintenance(true); + engine.putSearchHandler(DocTypeName("foo"), std::make_shared<MySearchHandler>(3)); + EXPECT_TRUE(assertSearchReply(engine, "foo", 3)); +} + +TEST("setNodeMaintenance(true) implies setNodeUp(false)") +{ + MatchEngine engine(1, 1, 7); + engine.setNodeUp(true); + engine.setNodeMaintenance(true); + EXPECT_FALSE(engine.isOnline()); +} + +TEST("setNodeMaintenance(false) does not imply setNodeUp(false)") +{ + MatchEngine engine(1, 1, 7); + engine.setNodeUp(true); + engine.setNodeMaintenance(false); + EXPECT_TRUE(engine.isOnline()); +} + +TEST("search interface is reported as offline when node is in maintenance mode") +{ + MatchEngine engine(1, 1, 7); + engine.setNodeMaintenance(true); + + Slime slime; + SlimeInserter inserter(slime); + engine.get_state(inserter, false); + EXPECT_EQUAL(search_interface_offline_slime_str(), + slime.toString()); } TEST_MAIN() { TEST_RUN_ALL(); } |