summaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-09-15 14:30:28 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-09-15 14:32:01 +0000
commit6913409d5fe94c86bfe238daebf9fd1b16072e49 (patch)
treed39072c993b3f3019de70773d7f6e71bd5ba40a8 /storage
parent947b8c17af4ed513ce22615ddd595527985df46a (diff)
Support activation of replicas even if ideal node in maintenance
Adds an internal feature support flag which communicates that the content node will not implicitly index a non-ideal replica marked explicitly as active. Activation during maintenance is only performed iff a content node has this flag set.
Diffstat (limited to 'storage')
-rw-r--r--storage/src/tests/distributor/statecheckerstest.cpp36
-rw-r--r--storage/src/vespa/storage/distributor/statecheckers.cpp14
2 files changed, 43 insertions, 7 deletions
diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp
index 94f913a3325..f94b0ae1bcd 100644
--- a/storage/src/tests/distributor/statecheckerstest.cpp
+++ b/storage/src/tests/distributor/statecheckerstest.cpp
@@ -284,6 +284,14 @@ struct StateCheckersTest : Test, DistributorStripeTestUtil {
bool includePriority = false);
std::string testBucketStatePerGroup(const std::string& bucketInfo,
bool includePriority = false);
+
+ void do_test_bucket_activation();
+
+ void set_node_supports_no_implicit_index_on_activation(uint16_t node, bool supported) {
+ NodeSupportedFeatures nsf;
+ nsf.no_implicit_indexing_of_active_buckets = supported;
+ set_node_supported_features(node, nsf);
+ }
};
StateCheckersTest::CheckerParams::CheckerParams() = default;
@@ -997,7 +1005,7 @@ std::string StateCheckersTest::testBucketState(
includePriority);
}
-TEST_F(StateCheckersTest, bucket_state) {
+void StateCheckersTest::do_test_bucket_activation() {
setup_stripe(2, 100, "distributor:1 storage:4");
{
@@ -1035,11 +1043,13 @@ TEST_F(StateCheckersTest, bucket_state) {
EXPECT_EQ("[Setting node 0 as active: copy has 3 docs]",
testBucketState("0=2/3/4"));
+ // TODO remove this
// A replica with more documents should be preferred over one with fewer.
EXPECT_EQ("[Setting node 3 as active: copy has 6 docs and ideal state priority 1]"
"[Setting node 1 as inactive]",
testBucketState("1=2/3/4/u/a,3=5/6/7/t"));
+ // TODO remove this
// Replica 2 has most documents and should be activated
EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]",
testBucketState("1=2/3/4,3=5/6/7/,2=8/9/10/t"));
@@ -1087,14 +1097,25 @@ TEST_F(StateCheckersTest, bucket_state) {
testBucketState("2=8/9/10/u/i/r,1=2/3/4/u/a/r,3=5/6/7/u/i/r"));
}
+TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_with_implicit_index_on_active) {
+ set_node_supports_no_implicit_index_on_activation(2, false);
+ do_test_bucket_activation();
+}
+
+TEST_F(StateCheckersTest, bucket_activation_behaves_as_expected_without_implicit_index_on_active) {
+ set_node_supports_no_implicit_index_on_activation(2, true);
+ do_test_bucket_activation();
+}
+
/**
* Users assume that setting nodes into maintenance will not cause extra load
* on the cluster, but activating non-ready copies because the active copy went
* into maintenance violates that assumption. See bug 6833209 for context and
* details.
*/
-TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance) {
+TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_implicitly_indexes) {
setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m");
+ set_node_supports_no_implicit_index_on_activation(2, false);
// Ideal node 1 is in maintenance and no ready copy available.
EXPECT_EQ("NO OPERATIONS GENERATED",
testBucketState("2=8/9/10/t/i/u,3=5/6/7"));
@@ -1103,6 +1124,17 @@ TEST_F(StateCheckersTest, do_not_activate_non_ready_copies_when_ideal_node_in_ma
testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u"));
}
+TEST_F(StateCheckersTest, activate_non_ready_copies_when_ideal_node_in_maintenance_if_active_does_not_implicitly_index) {
+ setup_stripe(2, 100, "distributor:1 storage:4 .1.s:m");
+ set_node_supports_no_implicit_index_on_activation(2, true);
+ // Ideal node 1 is in maintenance and no ready copy available.
+ EXPECT_EQ("[Setting node 2 as active: copy has 9 docs]", // TODO ideal state pri instead
+ testBucketState("2=8/9/10/t/i/u,3=5/6/7"));
+ // But we should activate another copy iff there's another ready copy.
+ EXPECT_EQ("[Setting node 2 as active: copy is ready with 9 docs]",
+ testBucketState("2=8/9/10/u/i/r,3=5/6/7/u/i/u"));
+}
+
/**
* We really do not want to activate buckets when they are inconsistent.
* See bug 6395693 for a set of reasons why.
diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp
index 71b2da1359a..aaf65f1a6fe 100644
--- a/storage/src/vespa/storage/distributor/statecheckers.cpp
+++ b/storage/src/vespa/storage/distributor/statecheckers.cpp
@@ -2,6 +2,7 @@
#include "statecheckers.h"
#include "activecopy.h"
+#include "node_supported_features_repo.h"
#include <vespa/document/bucket/fixed_bucket_spaces.h>
#include <vespa/storage/distributor/operations/idealstate/splitoperation.h>
#include <vespa/storage/distributor/operations/idealstate/joinoperation.h>
@@ -1038,15 +1039,18 @@ BucketStateStateChecker::shouldSkipActivationDueToMaintenance(
const StateChecker::Context& c) const
{
for (uint32_t i = 0; i < activeNodes.size(); ++i) {
- const BucketCopy* cp(c.entry->getNode(activeNodes[i]._nodeIndex));
+ const auto node_index = activeNodes[i]._nodeIndex;
+ const BucketCopy* cp(c.entry->getNode(node_index));
if (!cp || cp->active()) {
continue;
}
if (!cp->ready()) {
- // If copy is not ready, we don't want to activate it if a node
- // is set in maintenance. Doing so would imply that we want proton
- // to start background indexing.
- return containsMaintenanceNode(c.idealState, c);
+ if (!c.op_ctx.node_supported_features_repo().node_supported_features(node_index).no_implicit_indexing_of_active_buckets) {
+ // If copy is not ready, we don't want to activate it if a node
+ // is set in maintenance. Doing so would imply that we want proton
+ // to start background indexing.
+ return containsMaintenanceNode(c.idealState, c);
+ } // else: activation does not imply indexing, so we can safely do it at any time.
}
}
return false;