summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-02-22 10:18:11 +0100
committerGitHub <noreply@github.com>2019-02-22 10:18:11 +0100
commit8bd7b6534fab28d507629fca1c109fff65585c40 (patch)
tree30b18b46227bb346db41b34b155fb1b11c4a8beb
parent21a0951fb8906d60a6c2f565f6aac40087e986fe (diff)
parent33ac2ac4e5975db8a3132d2b10950eaf0a4cc877 (diff)
Merge pull request #8580 from vespa-engine/vekterli/add-workarounds-for-distribution-hash-mismatch-convergence-issue
Add workarounds for legacy global distribution hash handling
-rw-r--r--storage/src/tests/bucketdb/bucketmanagertest.cpp61
-rw-r--r--storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp66
-rw-r--r--storage/src/tests/distributor/bucketdbupdatertest.cpp56
-rw-r--r--storage/src/vespa/storage/bucketdb/bucketmanager.cpp17
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp49
-rw-r--r--storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h5
-rw-r--r--storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp3
-rw-r--r--storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h10
-rw-r--r--storage/src/vespa/storage/distributor/pendingclusterstate.cpp31
9 files changed, 277 insertions, 21 deletions
diff --git a/storage/src/tests/bucketdb/bucketmanagertest.cpp b/storage/src/tests/bucketdb/bucketmanagertest.cpp
index 54b3bf4b8d0..09fe310e97e 100644
--- a/storage/src/tests/bucketdb/bucketmanagertest.cpp
+++ b/storage/src/tests/bucketdb/bucketmanagertest.cpp
@@ -8,6 +8,7 @@
#include <vespa/document/update/documentupdate.h>
#include <vespa/document/repo/documenttyperepo.h>
#include <vespa/storage/bucketdb/bucketmanager.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/storage/persistence/filestorage/filestormanager.h>
#include <vespa/storageapi/message/persistence.h>
#include <vespa/storageapi/message/state.h>
@@ -84,6 +85,7 @@ public:
CPPUNIT_TEST(testConflictSetOnlyClearedAfterAllBucketRequestsDone);
CPPUNIT_TEST(testRejectRequestWithMismatchingDistributionHash);
CPPUNIT_TEST(testDbNotIteratedWhenAllRequestsRejected);
+ CPPUNIT_TEST(fall_back_to_legacy_global_distribution_hash_on_mismatch);
// FIXME(vekterli): test is not deterministic and enjoys failing
// sporadically when running under Valgrind. See bug 5932891.
@@ -154,6 +156,7 @@ public:
void testConflictSetOnlyClearedAfterAllBucketRequestsDone();
void testRejectRequestWithMismatchingDistributionHash();
void testDbNotIteratedWhenAllRequestsRejected();
+ void fall_back_to_legacy_global_distribution_hash_on_mismatch();
public:
static constexpr uint32_t DIR_SPREAD = 3;
@@ -785,6 +788,10 @@ public:
return std::make_shared<api::RequestBucketInfoCommand>(makeBucketSpace(), 0, _state, hash);
}
+ auto createFullFetchCommandWithHash(document::BucketSpace space, vespalib::stringref hash) const {
+ return std::make_shared<api::RequestBucketInfoCommand>(space, 0, _state, hash);
+ }
+
auto acquireBucketLockAndSendInfoRequest(const document::BucketId& bucket) {
auto guard = acquireBucketLock(bucket);
// Send down processing command which will block.
@@ -850,6 +857,45 @@ public:
_self._top->getRepliesOnce();
}
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ std::unique_ptr<lib::Distribution> default_grouped_distribution() {
+ return std::make_unique<lib::Distribution>(
+ GlobalBucketSpaceDistributionConverter::string_to_config(vespalib::string(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)")));
+ }
+
+ std::shared_ptr<lib::Distribution> derived_global_grouped_distribution(bool use_legacy) {
+ auto default_distr = default_grouped_distribution();
+ return GlobalBucketSpaceDistributionConverter::convert_to_global(*default_distr, use_legacy);
+ }
+
+ void set_grouped_distribution_configs() {
+ auto default_distr = default_grouped_distribution();
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::default_space()).setDistribution(std::move(default_distr));
+ auto global_distr = derived_global_grouped_distribution(false);
+ _self._node->getComponentRegister().getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::global_space()).setDistribution(std::move(global_distr));
+ }
+
private:
BucketManagerTest& _self;
lib::ClusterState _state;
@@ -1358,4 +1404,19 @@ BucketManagerTest::testDbNotIteratedWhenAllRequestsRejected()
auto replies = fixture.awaitAndGetReplies(1);
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketManagerTest::fall_back_to_legacy_global_distribution_hash_on_mismatch() {
+ ConcurrentOperationFixture f(*this);
+
+ f.set_grouped_distribution_configs();
+
+ auto legacy_hash = f.derived_global_grouped_distribution(true)->getNodeGraph().getDistributionConfigHash();
+
+ auto infoCmd = f.createFullFetchCommandWithHash(document::FixedBucketSpaces::global_space(), legacy_hash);
+ _top->sendDown(infoCmd);
+ auto replies = f.awaitAndGetReplies(1);
+ auto& reply = dynamic_cast<api::RequestBucketInfoReply&>(*replies[0]);
+ CPPUNIT_ASSERT_EQUAL(api::ReturnCode::OK, reply.getResult().getResult()); // _not_ REJECTED
+}
+
} // storage
diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
index 5afea9cd3cd..d75f2ac6459 100644
--- a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
+++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp
@@ -17,6 +17,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture
CPPUNIT_TEST(config_retired_state_is_propagated);
CPPUNIT_TEST(group_capacities_are_propagated);
CPPUNIT_TEST(global_distribution_has_same_owner_distributors_as_default);
+ CPPUNIT_TEST(can_generate_config_with_legacy_partition_spec);
CPPUNIT_TEST_SUITE_END();
void can_transform_flat_cluster_config();
@@ -27,6 +28,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture
void config_retired_state_is_propagated();
void group_capacities_are_propagated();
void global_distribution_has_same_owner_distributors_as_default();
+ void can_generate_config_with_legacy_partition_spec();
};
CPPUNIT_TEST_SUITE_REGISTRATION(GlobalBucketSpaceDistributionConverterTest);
@@ -35,9 +37,9 @@ using DistributionConfig = vespa::config::content::StorDistributionConfig;
namespace {
-vespalib::string default_to_global_config(const vespalib::string& default_config) {
+vespalib::string default_to_global_config(const vespalib::string& default_config, bool legacy_mode = false) {
auto default_cfg = GlobalBucketSpaceDistributionConverter::string_to_config(default_config);
- auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg);
+ auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg, legacy_mode);
return GlobalBucketSpaceDistributionConverter::config_to_string(*as_global);
}
@@ -377,4 +379,64 @@ group[2].nodes[1].index 2
}
}
+// By "legacy" read "broken", but we need to be able to generate it to support rolling upgrades properly.
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void GlobalBucketSpaceDistributionConverterTest::can_generate_config_with_legacy_partition_spec() {
+ vespalib::string default_config(
+R"(redundancy 2
+group[3]
+group[0].name "invalid"
+group[0].index "invalid"
+group[0].partitions 1|*
+group[0].nodes[0]
+group[1].name rack0
+group[1].index 0
+group[1].nodes[3]
+group[1].nodes[0].index 0
+group[1].nodes[1].index 1
+group[1].nodes[2].index 2
+group[2].name rack1
+group[2].index 1
+group[2].nodes[3]
+group[2].nodes[0].index 3
+group[2].nodes[1].index 4
+group[2].nodes[2].index 5
+)");
+
+ vespalib::string expected_global_config(
+R"(redundancy 6
+initial_redundancy 0
+ensure_primary_persisted true
+ready_copies 6
+active_per_leaf_group true
+distributor_auto_ownership_transfer_on_whole_group_down true
+group[0].index "invalid"
+group[0].name "invalid"
+group[0].capacity 1
+group[0].partitions "3|3|*"
+group[1].index "0"
+group[1].name "rack0"
+group[1].capacity 1
+group[1].partitions ""
+group[1].nodes[0].index 0
+group[1].nodes[0].retired false
+group[1].nodes[1].index 1
+group[1].nodes[1].retired false
+group[1].nodes[2].index 2
+group[1].nodes[2].retired false
+group[2].index "1"
+group[2].name "rack1"
+group[2].capacity 1
+group[2].partitions ""
+group[2].nodes[0].index 3
+group[2].nodes[0].retired false
+group[2].nodes[1].index 4
+group[2].nodes[1].retired false
+group[2].nodes[2].index 5
+group[2].nodes[2].retired false
+disk_distribution MODULO_BID
+)");
+ CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config, true));
+}
+
} \ No newline at end of file
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp
index 53f80854bef..b2d554c1e42 100644
--- a/storage/src/tests/distributor/bucketdbupdatertest.cpp
+++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp
@@ -111,6 +111,7 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture,
CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted);
CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted);
CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted);
+ CPPUNIT_TEST(global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection);
CPPUNIT_TEST_SUITE_END();
public:
@@ -175,6 +176,7 @@ protected:
void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted();
void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted();
void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted();
+ void global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection();
auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); }
@@ -505,7 +507,7 @@ public:
std::make_shared<lib::Distribution>(distConfig));
}
- std::string getDistConfig6Nodes3Groups() const {
+ std::string getDistConfig6Nodes2Groups() const {
return ("redundancy 2\n"
"group[3]\n"
"group[0].name \"invalid\"\n"
@@ -692,7 +694,7 @@ BucketDBUpdaterTest::testDistributorChange()
void
BucketDBUpdaterTest::testDistributorChangeWithGrouping()
{
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
int numBuckets = 100;
@@ -2073,7 +2075,7 @@ BucketDBUpdaterTest::testClusterStateAlwaysSendsFullFetchWhenDistributionChangeP
setAndEnableClusterState(stateBefore, expectedMsgs, dummyBucketsToReturn);
}
_sender.clear();
- std::string distConfig(getDistConfig6Nodes3Groups());
+ std::string distConfig(getDistConfig6Nodes2Groups());
setDistribution(distConfig);
sortSentMessagesByIndex(_sender);
CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
@@ -2549,4 +2551,52 @@ void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_div
"0:5/1/2/3|1:5/7/8/9", true));
}
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+void BucketDBUpdaterTest::global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection() {
+ std::string distConfig(getDistConfig6Nodes2Groups());
+ setDistribution(distConfig);
+
+ const vespalib::string current_hash = "(0d*|*(0;0;1;2)(1;3;4;5))";
+ const vespalib::string legacy_hash = "(0d3|3|*(0;0;1;2)(1;3;4;5))";
+
+ setSystemState(lib::ClusterState("distributor:6 storage:6"));
+ CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size());
+
+ api::RequestBucketInfoCommand* global_req = nullptr;
+ for (auto& cmd : _sender.commands) {
+ auto& req_cmd = dynamic_cast<api::RequestBucketInfoCommand&>(*cmd);
+ if (req_cmd.getBucketSpace() == document::FixedBucketSpaces::global_space()) {
+ global_req = &req_cmd;
+ break;
+ }
+ }
+ CPPUNIT_ASSERT(global_req != nullptr);
+ CPPUNIT_ASSERT_EQUAL(current_hash, global_req->getDistributionHash());
+
+ auto reply = std::make_shared<api::RequestBucketInfoReply>(*global_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ // Should now be a resent request with legacy distribution hash
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 1, _sender.commands.size());
+ auto& legacy_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(legacy_hash, legacy_req.getDistributionHash());
+
+ // Now if we reject it _again_ we should cycle back to the current hash
+ // in case it wasn't a hash-based rejection after all. And the circle of life continues.
+ reply = std::make_shared<api::RequestBucketInfoReply>(legacy_req);
+ reply->setResult(api::ReturnCode::REJECTED);
+ getBucketDBUpdater().onRequestBucketInfoReply(reply);
+
+ getClock().addSecondsToTime(10);
+ getBucketDBUpdater().resendDelayedMessages();
+
+ CPPUNIT_ASSERT_EQUAL(messageCount(6) + 2, _sender.commands.size());
+ auto& new_current_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back());
+ CPPUNIT_ASSERT_EQUAL(current_hash, new_current_req.getDistributionHash());
+}
+
}
diff --git a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
index 41de215d877..a1c1742edb5 100644
--- a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
+++ b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp
@@ -6,6 +6,7 @@
#include <iomanip>
#include <vespa/storage/common/content_bucket_space_repo.h>
#include <vespa/storage/common/nodestateupdater.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/vdslib/state/cluster_state_bundle.h>
#include <vespa/storage/storageutil/distributorstatecache.h>
#include <vespa/storageframework/generic/status/htmlstatusreporter.h>
@@ -577,7 +578,21 @@ BucketManager::processRequestBucketInfoCommands(document::BucketSpace bucketSpac
<< " differs from this state.";
} else if (!their_hash.empty() && their_hash != our_hash) {
// Empty hash indicates request from 4.2 protocol or earlier
- error << "Distribution config has changed since request.";
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ bool matches_legacy_hash = false;
+ if (bucketSpace == document::FixedBucketSpaces::global_space()) {
+ const auto default_distr =_component.getBucketSpaceRepo()
+ .get(document::FixedBucketSpaces::default_space()).getDistribution();
+ // Convert in legacy distribution mode, which will accept old 'hash' structure.
+ const auto legacy_global_distr = GlobalBucketSpaceDistributionConverter::convert_to_global(
+ *default_distr, true/*use legacy mode*/);
+ const auto legacy_hash = legacy_global_distr->getNodeGraph().getDistributionConfigHash();
+ LOG(debug, "Falling back to comparing against legacy distribution hash: %s", legacy_hash.c_str());
+ matches_legacy_hash = (their_hash == legacy_hash);
+ }
+ if (!matches_legacy_hash) {
+ error << "Distribution config has changed since request.";
+ }
}
if (error.str().empty()) {
std::pair<std::set<uint16_t>::iterator, bool> result(
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
index 534644458bc..cbcaeef8fdf 100644
--- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp
@@ -59,6 +59,21 @@ vespalib::string sub_groups_to_partition_spec(const Group& parent) {
return spec.str();
}
+// Allow generating legacy (broken) partition specs that may be used transiently
+// during rolling upgrades from a pre-fix version to a post-fix version.
+// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+vespalib::string sub_groups_to_legacy_partition_spec(const Group& parent) {
+ vespalib::asciistream partitions;
+ // In case of a flat cluster config, this ends up with a partition spec of '*',
+ // which is fine. It basically means "put all replicas in this group", which
+ // happens to be exactly what we want.
+ for (auto& child : parent.sub_groups) {
+ partitions << child.second->nested_leaf_count << '|';
+ }
+ partitions << '*';
+ return partitions.str();
+}
+
bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept {
return !g.nodes.empty();
}
@@ -87,19 +102,31 @@ void insert_new_group_into_tree(
void build_transformed_root_group(DistributionConfigBuilder& builder,
const DistributionConfigBuilder::Group& config_source_root,
- const Group& parsed_root) {
+ const Group& parsed_root,
+ bool legacy_mode) {
DistributionConfigBuilder::Group new_root(config_source_root);
- new_root.partitions = sub_groups_to_partition_spec(parsed_root);
+ if (!legacy_mode) {
+ new_root.partitions = sub_groups_to_partition_spec(parsed_root);
+ } else {
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ new_root.partitions = sub_groups_to_legacy_partition_spec(parsed_root);
+ }
builder.group.emplace_back(std::move(new_root));
}
void build_transformed_non_root_group(DistributionConfigBuilder& builder,
const DistributionConfigBuilder::Group& config_source_group,
- const Group& parsed_root) {
+ const Group& parsed_root,
+ bool legacy_mode) {
DistributionConfigBuilder::Group new_group(config_source_group);
if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes
const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root);
- new_group.partitions = sub_groups_to_partition_spec(g);
+ if (!legacy_mode) {
+ new_group.partitions = sub_groups_to_partition_spec(g);
+ } else {
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ new_group.partitions = sub_groups_to_legacy_partition_spec(g);
+ }
}
builder.group.emplace_back(std::move(new_group));
}
@@ -135,16 +162,16 @@ std::unique_ptr<Group> create_group_tree_from_config(const DistributionConfig& s
* transitively, its parents again etc) have already been processed. This directly
* implies that the root group is always the first group present in the config.
*/
-void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) {
+void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source, bool legacy_mode) {
assert(!source.group.empty()); // TODO gracefully handle empty config?
auto root = create_group_tree_from_config(source);
auto g_iter = source.group.begin();
const auto g_end = source.group.end();
- build_transformed_root_group(builder, *g_iter, *root);
+ build_transformed_root_group(builder, *g_iter, *root, legacy_mode);
++g_iter;
for (; g_iter != g_end; ++g_iter) {
- build_transformed_non_root_group(builder, *g_iter, *root);
+ build_transformed_non_root_group(builder, *g_iter, *root, legacy_mode);
}
builder.redundancy = root->nested_leaf_count;
@@ -154,17 +181,17 @@ void build_global_groups(DistributionConfigBuilder& builder, const DistributionC
} // anon ns
std::shared_ptr<DistributionConfig>
-GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) {
+GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source, bool legacy_mode) {
DistributionConfigBuilder builder;
set_distribution_invariant_config_fields(builder, source);
- build_global_groups(builder, source);
+ build_global_groups(builder, source, legacy_mode);
return std::make_shared<DistributionConfig>(builder);
}
std::shared_ptr<lib::Distribution>
-GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr) {
+GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr, bool legacy_mode) {
const auto src_config = distr.serialize();
- auto global_config = convert_to_global(*string_to_config(src_config));
+ auto global_config = convert_to_global(*string_to_config(src_config), legacy_mode);
return std::make_shared<lib::Distribution>(*global_config);
}
diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
index d135f56a5c1..b2be65dad42 100644
--- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
+++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h
@@ -10,8 +10,9 @@ namespace storage {
struct GlobalBucketSpaceDistributionConverter {
using DistributionConfig = vespa::config::content::StorDistributionConfig;
- static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&);
- static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&);
+ // TODO remove legacy_mode flags on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&, bool legacy_mode = false);
+ static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&, bool legacy_mode = false);
// Helper functions which may be of use outside this class
static std::unique_ptr<DistributionConfig> string_to_config(const vespalib::string&);
diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
index 2071558628e..c295be19a0b 100644
--- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
+++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp
@@ -35,7 +35,8 @@ PendingBucketSpaceDbTransition::PendingBucketSpaceDbTransition(const PendingClus
_pendingClusterState(pendingClusterState),
_distributorBucketSpace(distributorBucketSpace),
_distributorIndex(_clusterInfo->getDistributorIndex()),
- _bucketOwnershipTransfer(distributionChanged)
+ _bucketOwnershipTransfer(distributionChanged),
+ _rejectedRequests()
{
if (distributorChanged()) {
_bucketOwnershipTransfer = true;
diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
index 903f9b762fb..7eb2974eb52 100644
--- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
+++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h
@@ -4,6 +4,7 @@
#include "pending_bucket_space_db_transition_entry.h"
#include "outdated_nodes.h"
#include <vespa/storage/bucketdb/bucketdatabase.h>
+#include <unordered_map>
namespace storage::api { class RequestBucketInfoReply; }
namespace storage::lib { class ClusterState; class State; }
@@ -48,6 +49,7 @@ private:
DistributorBucketSpace &_distributorBucketSpace;
uint16_t _distributorIndex;
bool _bucketOwnershipTransfer;
+ std::unordered_map<uint16_t, size_t> _rejectedRequests;
// BucketDataBase::MutableEntryProcessor API
bool process(BucketDatabase::Entry& e) override;
@@ -111,6 +113,14 @@ public:
// Methods used by unit tests.
const EntryList& results() const { return _entries; }
void addNodeInfo(const document::BucketId& id, const BucketCopy& copy);
+
+ void incrementRequestRejections(uint16_t node) {
+ _rejectedRequests[node]++;
+ }
+ size_t rejectedRequests(uint16_t node) const {
+ auto iter = _rejectedRequests.find(node);
+ return ((iter != _rejectedRequests.end()) ? iter->second : 0);
+ }
};
}
diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
index 1996ae9d2af..5f74a82c28a 100644
--- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
+++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp
@@ -7,6 +7,7 @@
#include "distributor_bucket_space.h"
#include <vespa/storageframework/defaultimplementation/clock/realclock.h>
#include <vespa/storage/common/bucketoperationlogger.h>
+#include <vespa/storage/common/global_bucket_space_distribution_converter.h>
#include <vespa/document/bucket/fixed_bucket_spaces.h>
#include <vespa/vespalib/util/xmlstream.hpp>
#include <climits>
@@ -185,7 +186,30 @@ PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode)
{
const auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpaceAndNode.bucketSpace));
const auto &distribution(distributorBucketSpace.getDistribution());
- vespalib::string distributionHash(distribution.getNodeGraph().getDistributionConfigHash());
+ vespalib::string distributionHash;
+ // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475
+ bool sendLegacyHash = false;
+ if (bucketSpaceAndNode.bucketSpace == document::FixedBucketSpaces::global_space()) {
+ auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace);
+ assert(transitionIter != _pendingTransitions.end());
+ // First request cannot have been rejected yet and will thus be sent with non-legacy hash.
+ // Subsequent requests will be sent 50-50. This is because a request may be rejected due to
+ // other reasons than just the hash mismatching, so if we don't cycle back to the non-legacy
+ // hash we risk never converging.
+ sendLegacyHash = ((transitionIter->second->rejectedRequests(bucketSpaceAndNode.node) % 2) == 1);
+ }
+ if (!sendLegacyHash) {
+ distributionHash = distribution.getNodeGraph().getDistributionConfigHash();
+ } else {
+ const auto& defaultSpace = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space());
+ // Generate legacy distribution hash explicitly.
+ auto legacyGlobalDistr = GlobalBucketSpaceDistributionConverter::convert_to_global(
+ defaultSpace.getDistribution(), true/*use legacy mode*/);
+ distributionHash = legacyGlobalDistr->getNodeGraph().getDistributionConfigHash();
+ LOG(debug, "Falling back to sending legacy hash to node %u: %s",
+ bucketSpaceAndNode.node, distributionHash.c_str());
+ }
+
LOG(debug,
"Requesting bucket info for bucket space %" PRIu64 " node %d with cluster state '%s' "
"and distribution hash '%s'",
@@ -238,6 +262,11 @@ PendingClusterState::onRequestBucketInfoReply(const std::shared_ptr<api::Request
resendTime += framework::MilliSecTime(100);
_delayedRequests.emplace_back(resendTime, bucketSpaceAndNode);
_sentMessages.erase(iter);
+ if (result.getResult() == api::ReturnCode::REJECTED) {
+ auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace);
+ assert(transitionIter != _pendingTransitions.end());
+ transitionIter->second->incrementRequestRejections(bucketSpaceAndNode.node);
+ }
return true;
}