summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 15:10:07 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2019-02-21 16:42:34 +0000
commit33ac2ac4e5975db8a3132d2b10950eaf0a4cc877 (patch)
tree6acb813d22b6dfcd6f794fccd18b6c7ec8fcc691
parent67d81887caf35a6c71fae85e7a3e14446c0d8278 (diff)
Add workarounds for legacy global distribution hash handling
This addresses a regression introduced as part of #8479, which in turn was intended to serve as a fix for issue #8475. This regression would stall cluster state convergence when a subset of nodes contained the fix and another subset did not. With the workarounds present, nodes gracefully handle the case where different distribution hashes are expected for the global bucket space. `BucketManager` will now fall back to comparing the new incoming hash to that of the legacy derived distribution config if it mismatches. `PendingClusterState` will try to send a subset of bucket info requests with legacy hash format for the global bucket space iff there has been at least 1 rejected request. All these workarounds will be removed on Vespa 8.
-rw-r--r--storage/src/tests/bucketdb/bucketmanagertest.cpp61
-rw-r--r--storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp66
-rw-r--r--storage/src/tests/distributor/bucketdbupdatertest.cpp56
-rw-r--r--storage/src/vespa/storage/bucketdb/bucketmanager.cpp17
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp49
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h5
-rw-r--r--storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp3
-rw-r--r--storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h10
-rw-r--r--storage/src/vespa/storage/distributor/pendingclusterstate.cpp31
9 files changed, 277 insertions, 21 deletions
diff --git a/storage/src/tests/bucketdb/bucketmanagertest.cpp b/storage/src/tests/bucketdb/bucketmanagertest.cpp
index 54b3bf4b8d0..09fe310e97e 100644
--- a/storage/src/tests/bucketdb/bucketmanagertest.cpp
+++ b/storage/src/tests/bucketdb/bucketmanagertest.cpp
@@ -8,6 +8,7 @@
#include <vespa/document/update/documentupdate.h>
#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/storage/bucketdb/bucketmanager.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/storage/persistence/filestorage/filestormanager.h>
#include <vespa/storageapi/message/persistence.h>
#include <vespa/storageapi/message/state.h>
@@ -84,6 +85,7 @@ public:
CPPUNIT_TEST(testConflictSetOnlyClearedAfterAllBucketRequestsDone);
CPPUNIT_TEST(testRejectRequestWithMismatchingDistributionHash);
CPPUNIT_TEST(testDbNotIteratedWhenAllRequestsRejected);
+ CPPUNIT_TEST(fall_back_to_legacy_global_distribution_hash_on_mismatch);
// FIXME(vekterli): test is not deterministic and enjoys failing
// sporadically when running under Valgrind. See bug 5932891.
@@ -154,6 +156,7 @@ public:
void testConflictSetOnlyClearedAfterAllBucketRequestsDone();
void testRejectRequestWithMismatchingDistributionHash();
void testDbNotIteratedWhenAllRequestsRejected();
+ void fall_back_to_legacy_global_distribution_hash_on_mismatch();
public:
static constexpr uint32_t DIR_SPREAD = 3;
@@ -785,6 +788,10 @@ public:
return std::make_shared<api::RequestBucketInfoCommand>(makeBucketSpace(), 0, _state, hash);
}
+ auto createFullFetchCommandWithHash(document::BucketSpace space, vespalib::stringref hash) const {
+ return std::make_shared<api::RequestBucketInfoCommand>(space, 0, _state, hash);
+ }
+
auto acquireBucketLockAndSendInfoRequest(const document::BucketId& bucket) {
auto guard = acquireBucketLock(bucket);
// Send down processing command which will block.
@@ -850,6 +857,45 @@ public:
_self._top->getRepliesOnce();
}
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ std::unique_ptr<lib::Distribution> default_grouped_distribution() {
+ return std::make_unique<lib::Distribution>(
+ GlobalBucketSpaceDistributionConverter::string_to_config(vespalib::string(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)")));
+ }
+
+ std::shared_ptr<lib::Distribution> derived_global_grouped_distribution(bool use_legacy) {
+ auto default_distr = default_grouped_distribution();
+ return GlobalBucketSpaceDistributionConverter::convert_to_global(*default_distr, use_legacy);
+ }
+
+ void set_grouped_distribution_configs() {
+ auto default_distr = default_grouped_distribution();
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::default_space()).setDistribution(std::move(default_distr));
+ auto global_distr = derived_global_grouped_distribution(false);
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::global_space()).setDistribution(std::move(global_distr));
+ }
+
private:
BucketManagerTest& _self;
lib::ClusterState _state;
@@ -1358,4 +1404,19 @@ BucketManagerTest::testDbNotIteratedWhenAllRequestsRejected()
auto replies = fixture.awaitAndGetReplies(1);
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketManagerTest::fall_back_to_legacy_global_distribution_hash_on_mismatch() {
+ ConcurrentOperationFixture f(*this);
+
+ f.set_grouped_distribution_configs();
+
+ auto legacy_hash = f.derived_global_grouped_distribution(true)->getNodeGraph().getDistributionConfigHash();
+
+ auto infoCmd = f.createFullFetchCommandWithHash(document::FixedBucketSpaces::global_space(), legacy_hash);
+ _top->sendDown(infoCmd);
+ auto replies = f.awaitAndGetReplies(1);
+ auto& reply = dynamic_cast<api::RequestBucketInfoReply&>(*replies[0]);
+ CPPUNIT_ASSERT_EQUAL(api::ReturnCode::OK, reply.getResult().getResult()); // _not_ REJECTED
+}
+
} // storage
diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
index 5afea9cd3cd..d75f2ac6459 100644
--- a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
+++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
@@ -17,6 +17,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture
CPPUNIT_TEST(config_retired_state_is_propagated);
CPPUNIT_TEST(group_capacities_are_propagated);
CPPUNIT_TEST(global_distribution_has_same_owner_distributors_as_default);
+ CPPUNIT_TEST(can_generate_config_with_legacy_partition_spec);
CPPUNIT_TEST_SUITE_END();
void can_transform_flat_cluster_config();
@@ -27,6 +28,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture
void config_retired_state_is_propagated();
void group_capacities_are_propagated();
void global_distribution_has_same_owner_distributors_as_default();
+ void can_generate_config_with_legacy_partition_spec();
};
CPPUNIT_TEST_SUITE_REGISTRATION(GlobalBucketSpaceDistributionConverterTest);
@@ -35,9 +37,9 @@ using DistributionConfig = vespa::config::content::StorDistributionConfig;
namespace {
-vespalib::string default_to_global_config(const vespalib::string& default_config) {
+vespalib::string default_to_global_config(const vespalib::string& default_config, bool legacy_mode = false) {
auto default_cfg = GlobalBucketSpaceDistributionConverter::string_to_config(default_config);
- auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+ auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg, legacy_mode);
return GlobalBucketSpaceDistributionConverter::config_to_string(*as_global);
}
@@ -377,4 +379,64 @@ group[2].nodes[1].index 2
}
}
+// By "legacy" read "broken", but we need to be able to generate it to support rolling upgrades properly.
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void GlobalBucketSpaceDistributionConverterTest::can_generate_config_with_legacy_partition_spec() {
+ vespalib::string default_config(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)");
+
+ vespalib::string expected_global_config(
+R"(redundancy 6
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 6
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "3|3|*"
+group[1].index "0"
+group[1].name "rack0"
+group[1].capacity 1
+group[1].partitions ""
+group[1].nodes[0].index 0
+group[1].nodes[0].retired false
+group[1].nodes[1].index 1
+group[1].nodes[1].retired false
+group[1].nodes[2].index 2
+group[1].nodes[2].retired false
+group[2].index "1"
+group[2].name "rack1"
+group[2].capacity 1
+group[2].partitions ""
+group[2].nodes[0].index 3
+group[2].nodes[0].retired false
+group[2].nodes[1].index 4
+group[2].nodes[1].retired false
+group[2].nodes[2].index 5
+group[2].nodes[2].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config, true));
+}
+
} \ No newline at end of file
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp
index 53f80854bef..b2d554c1e42 100644
--- a/storage/src/tests/distributor/bucketdbupdatertest.cpp
+++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp
@@ -111,6 +111,7 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture,
CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted);
CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted);
CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted);
+ CPPUNIT_TEST(global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection);
CPPUNIT_TEST_SUITE_END();
public:
@@ -175,6 +176,7 @@ protected:
void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted();
void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted();
void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted();
+ void global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection();
auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); }
@@ -505,7 +507,7 @@ public:
std::make_shared<lib::Distribution>(distConfig));
}
- std::string getDistConfig6Nodes3Groups() const {
+ std::string getDistConfig6Nodes2Groups() const {
return ("redundancy 2\n"
"group[3]\n"
"group[0].name \"invalid\"\n"
@@ -692,7 +694,7 @@ BucketDBUpdaterTest::testDistributorChange()
void
BucketDBUpdaterTest::testDistributorChangeWithGrouping()
{
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
int numBuckets = 100;
@@ -2073,7 +2075,7 @@ BucketDBUpdaterTest::testClusterStateAlwaysSendsFullFetchWhenDistributionChangeP
setAndEnableClusterState(stateBefore, expectedMsgs, dummyBucketsToReturn);
}
_sender.clear();
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
sortSentMessagesByIndex(_sender);
CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
@@ -2549,4 +2551,52 @@ void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_div
"0:5/1/2/3|1:5/7/8/9", true));
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketDBUpdaterTest::global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection() {
+ std::string distConfig(getDistConfig6Nodes2Groups());
+ setDistribution(distConfig);
+
+ const vespalib::string current_hash = "(0d*|*(0;0;1;2)(1;3;4;5))";
+ const vespalib::string legacy_hash = "(0d3|3|*(0;0;1;2)(1;3;4;5))";
+
+ setSystemState(lib::ClusterState("distributor:6 storage:6"));
+ CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
+
+ api::RequestBucketInfoCommand* global_req = nullptr;
+ for (auto& cmd : _sender.commands) {
+ auto& req_cmd = dynamic_cast<api::RequestBucketInfoCommand&>(*cmd);
+ if (req_cmd.getBucketSpace() == document::FixedBucketSpaces::global_space()) {
+ global_req = &req_cmd;
+ break;
+ }
+ }
+ CPPUNIT_ASSERT(global_req != nullptr);
+ CPPUNIT_ASSERT_EQUAL(current_hash, global_req->getDistributionHash());
+
+ auto reply = std::make_shared<api::RequestBucketInfoReply>(*global_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ // Should now be a resent request with legacy distribution hash
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 1, _sender.commands.size());
+ auto& legacy_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(legacy_hash, legacy_req.getDistributionHash());
+
+ // Now if we reject it _again_ we should cycle back to the current hash
+ // in case it wasn't a hash-based rejection after all. And the circle of life continues.
+ reply = std::make_shared<api::RequestBucketInfoReply>(legacy_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 2, _sender.commands.size());
+ auto& new_current_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(current_hash, new_current_req.getDistributionHash());
+}
+
}
diff --git a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
index 41de215d877..a1c1742edb5 100644
--- a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
+++ b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
@@ -6,6 +6,7 @@
#include <iomanip>
#include <vespa/storage/common/content_bucket_space_repo.h>
#include <vespa/storage/common/nodestateupdater.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/vdslib/state/cluster_state_bundle.h>
#include <vespa/storage/storageutil/distributorstatecache.h>
#include <vespa/storageframework/generic/status/htmlstatusreporter.h>
@@ -577,7 +578,21 @@ BucketManager::processRequestBucketInfoCommands(document::BucketSpace bucketSpac
<< " differs from this state.";
} else if (!their_hash.empty() && their_hash != our_hash) {
// Empty hash indicates request from 4.2 protocol or earlier
- error << "Distribution config has changed since request.";
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ bool matches_legacy_hash = false;
+ if (bucketSpace == document::FixedBucketSpaces::global_space()) {
+ const auto default_distr =_component.getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::default_space()).getDistribution();
+ // Convert in legacy distribution mode, which will accept old 'hash' structure.
+ const auto legacy_global_distr = GlobalBucketSpaceDistributionConverter::convert_to_global(
+ *default_distr, true/*use legacy mode*/);
+ const auto legacy_hash = legacy_global_distr->getNodeGraph().getDistributionConfigHash();
+ LOG(debug, "Falling back to comparing against legacy distribution hash: %s", legacy_hash.c_str());
+ matches_legacy_hash = (their_hash == legacy_hash);
+ }
+ if (!matches_legacy_hash) {
+ error << "Distribution config has changed since request.";
+ }
}
if (error.str().empty()) {
std::pair<std::set<uint16_t>::iterator, bool> result(
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
index 534644458bc..cbcaeef8fdf 100644
--- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
@@ -59,6 +59,21 @@ vespalib::string sub_groups_to_partition_spec(const Group& parent) {
return spec.str();
}
+// Allow generating legacy (broken) partition specs that may be used transiently
+// during rolling upgrades from a pre-fix version to a post-fix version.
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+vespalib::string sub_groups_to_legacy_partition_spec(const Group& parent) {
+ vespalib::asciistream partitions;
+ // In case of a flat cluster config, this ends up with a partition spec of '*',
+ // which is fine. It basically means "put all replicas in this group", which
+ // happens to be exactly what we want.
+ for (auto& child : parent.sub_groups) {
+ partitions << child.second->nested_leaf_count << '|';
+ }
+ partitions << '*';
+ return partitions.str();
+}
+
bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept {
return !g.nodes.empty();
}
@@ -87,19 +102,31 @@ void insert_new_group_into_tree(
void build_transformed_root_group(DistributionConfigBuilder& builder,
const DistributionConfigBuilder::Group& config_source_root,
- const Group& parsed_root) {
+ const Group& parsed_root,
+ bool legacy_mode) {
DistributionConfigBuilder::Group new_root(config_source_root);
- new_root.partitions = sub_groups_to_partition_spec(parsed_root);
+ if (!legacy_mode) {
+ new_root.partitions = sub_groups_to_partition_spec(parsed_root);
+ } else {
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ new_root.partitions = sub_groups_to_legacy_partition_spec(parsed_root);
+ }
builder.group.emplace_back(std::move(new_root));
}
void build_transformed_non_root_group(DistributionConfigBuilder& builder,
const DistributionConfigBuilder::Group& config_source_group,
- const Group& parsed_root) {
+ const Group& parsed_root,
+ bool legacy_mode) {
DistributionConfigBuilder::Group new_group(config_source_group);
if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes
const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root);
- new_group.partitions = sub_groups_to_partition_spec(g);
+ if (!legacy_mode) {
+ new_group.partitions = sub_groups_to_partition_spec(g);
+ } else {
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ new_group.partitions = sub_groups_to_legacy_partition_spec(g);
+ }
}
builder.group.emplace_back(std::move(new_group));
}
@@ -135,16 +162,16 @@ std::unique_ptr<Group> create_group_tree_from_config(const DistributionConfig& s
* transitively, its parents again etc) have already been processed. This directly
* implies that the root group is always the first group present in the config.
*/
-void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) {
+void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source, bool legacy_mode) {
assert(!source.group.empty()); // TODO gracefully handle empty config?
auto root = create_group_tree_from_config(source);
auto g_iter = source.group.begin();
const auto g_end = source.group.end();
- build_transformed_root_group(builder, *g_iter, *root);
+ build_transformed_root_group(builder, *g_iter, *root, legacy_mode);
++g_iter;
for (; g_iter != g_end; ++g_iter) {
- build_transformed_non_root_group(builder, *g_iter, *root);
+ build_transformed_non_root_group(builder, *g_iter, *root, legacy_mode);
}
builder.redundancy = root->nested_leaf_count;
@@ -154,17 +181,17 @@ void build_global_groups(DistributionConfigBuilder& builder, const DistributionC
} // anon ns
std::shared_ptr<DistributionConfig>
-GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) {
+GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source, bool legacy_mode) {
DistributionConfigBuilder builder;
set_distribution_invariant_config_fields(builder, source);
- build_global_groups(builder, source);
+ build_global_groups(builder, source, legacy_mode);
return std::make_shared<DistributionConfig>(builder);
}
std::shared_ptr<lib::Distribution>
-GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr) {
+GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr, bool legacy_mode) {
const auto src_config = distr.serialize();
- auto global_config = convert_to_global(*string_to_config(src_config));
+ auto global_config = convert_to_global(*string_to_config(src_config), legacy_mode);
return std::make_shared<lib::Distribution>(*global_config);
}
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
index d135f56a5c1..b2be65dad42 100644
--- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
@@ -10,8 +10,9 @@ namespace storage {
struct GlobalBucketSpaceDistributionConverter {
using DistributionConfig = vespa::config::content::StorDistributionConfig;
- static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&);
- static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&);
+ // TODO remove legacy_mode flags on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&, bool legacy_mode = false);
+ static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&, bool legacy_mode = false);
// Helper functions which may be of use outside this class
static std::unique_ptr<DistributionConfig> string_to_config(const vespalib::string&);
diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
index 2071558628e..c295be19a0b 100644
--- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
+++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
@@ -35,7 +35,8 @@ PendingBucketSpaceDbTransition::PendingBucketSpaceDbTransition(const PendingClus
_pendingClusterState(pendingClusterState),
_distributorBucketSpace(distributorBucketSpace),
_distributorIndex(_clusterInfo->getDistributorIndex()),
- _bucketOwnershipTransfer(distributionChanged)
+ _bucketOwnershipTransfer(distributionChanged),
+ _rejectedRequests()
{
if (distributorChanged()) {
_bucketOwnershipTransfer = true;
diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
index 903f9b762fb..7eb2974eb52 100644
--- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
+++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
@@ -4,6 +4,7 @@
#include "pending_bucket_space_db_transition_entry.h"
#include "outdated_nodes.h"
#include <vespa/storage/bucketdb/bucketdatabase.h>
+#include <unordered_map>
namespace storage::api { class RequestBucketInfoReply; }
namespace storage::lib { class ClusterState; class State; }
@@ -48,6 +49,7 @@ private:
DistributorBucketSpace &_distributorBucketSpace;
uint16_t _distributorIndex;
bool _bucketOwnershipTransfer;
+ std::unordered_map<uint16_t, size_t> _rejectedRequests;
// BucketDataBase::MutableEntryProcessor API
bool process(BucketDatabase::Entry& e) override;
@@ -111,6 +113,14 @@ public:
// Methods used by unit tests.
const EntryList& results() const { return _entries; }
void addNodeInfo(const document::BucketId& id, const BucketCopy& copy);
+
+ void incrementRequestRejections(uint16_t node) {
+ _rejectedRequests[node]++;
+ }
+ size_t rejectedRequests(uint16_t node) const {
+ auto iter = _rejectedRequests.find(node);
+ return ((iter != _rejectedRequests.end()) ? iter->second : 0);
+ }
};
}
diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
index 1996ae9d2af..5f74a82c28a 100644
--- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
+++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
@@ -7,6 +7,7 @@
#include "distributor_bucket_space.h"
#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
#include <vespa/storage/common/bucketoperationlogger.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/document/bucket/fixed_bucket_spaces.h>
#include <vespa/vespalib/util/xmlstream.hpp>
#include <climits>
@@ -185,7 +186,30 @@ PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode)
{
const auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpaceAndNode.bucketSpace));
const auto &distribution(distributorBucketSpace.getDistribution());
- vespalib::string distributionHash(distribution.getNodeGraph().getDistributionConfigHash());
+ vespalib::string distributionHash;
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ bool sendLegacyHash = false;
+ if (bucketSpaceAndNode.bucketSpace == document::FixedBucketSpaces::global_space()) {
+ auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace);
+ assert(transitionIter != _pendingTransitions.end());
+ // First request cannot have been rejected yet and will thus be sent with non-legacy hash.
+ // Subsequent requests will be sent 50-50. This is because a request may be rejected due to
+ // other reasons than just the hash mismatching, so if we don't cycle back to the non-legacy
+ // hash we risk never converging.
+ sendLegacyHash = ((transitionIter->second->rejectedRequests(bucketSpaceAndNode.node) % 2) == 1);
+ }
+ if (!sendLegacyHash) {
+ distributionHash = distribution.getNodeGraph().getDistributionConfigHash();
+ } else {
+ const auto& defaultSpace = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space());
+ // Generate legacy distribution hash explicitly.
+ auto legacyGlobalDistr = GlobalBucketSpaceDistributionConverter::convert_to_global(
+ defaultSpace.getDistribution(), true/*use legacy mode*/);
+ distributionHash = legacyGlobalDistr->getNodeGraph().getDistributionConfigHash();
+ LOG(debug, "Falling back to sending legacy hash to node %u: %s",
+ bucketSpaceAndNode.node, distributionHash.c_str());
+ }
+
LOG(debug,
"Requesting bucket info for bucket space %" PRIu64 " node %d with cluster state '%s' "
"and distribution hash '%s'",
@@ -238,6 +262,11 @@ PendingClusterState::onRequestBucketInfoReply(const std::shared_ptr<api::Request
resendTime += framework::MilliSecTime(100);
_delayedRequests.emplace_back(resendTime, bucketSpaceAndNode);
_sentMessages.erase(iter);
+ if (result.getResult() == api::ReturnCode::REJECTED) {
+ auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace);
+ assert(transitionIter != _pendingTransitions.end());
+ transitionIter->second->incrementRequestRejections(bucketSpaceAndNode.node);
+ }
return true;
}