diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-02-22 10:18:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-02-22 10:18:11 +0100 |
commit | 8bd7b6534fab28d507629fca1c109fff65585c40 (patch) | |
tree | 30b18b46227bb346db41b34b155fb1b11c4a8beb | |
parent | 21a0951fb8906d60a6c2f565f6aac40087e986fe (diff) | |
parent | 33ac2ac4e5975db8a3132d2b10950eaf0a4cc877 (diff) |
Merge pull request #8580 from vespa-engine/vekterli/add-workarounds-for-distribution-hash-mismatch-convergence-issue
Add workarounds for legacy global distribution hash handling
9 files changed, 277 insertions, 21 deletions
diff --git a/storage/src/tests/bucketdb/bucketmanagertest.cpp b/storage/src/tests/bucketdb/bucketmanagertest.cpp index 54b3bf4b8d0..09fe310e97e 100644 --- a/storage/src/tests/bucketdb/bucketmanagertest.cpp +++ b/storage/src/tests/bucketdb/bucketmanagertest.cpp @@ -8,6 +8,7 @@ #include <vespa/document/update/documentupdate.h> #include <vespa/document/repo/documenttyperepo.h> #include <vespa/storage/bucketdb/bucketmanager.h> +#include <vespa/storage/common/global_bucket_space_distribution_converter.h> #include <vespa/storage/persistence/filestorage/filestormanager.h> #include <vespa/storageapi/message/persistence.h> #include <vespa/storageapi/message/state.h> @@ -84,6 +85,7 @@ public: CPPUNIT_TEST(testConflictSetOnlyClearedAfterAllBucketRequestsDone); CPPUNIT_TEST(testRejectRequestWithMismatchingDistributionHash); CPPUNIT_TEST(testDbNotIteratedWhenAllRequestsRejected); + CPPUNIT_TEST(fall_back_to_legacy_global_distribution_hash_on_mismatch); // FIXME(vekterli): test is not deterministic and enjoys failing // sporadically when running under Valgrind. See bug 5932891. @@ -154,6 +156,7 @@ public: void testConflictSetOnlyClearedAfterAllBucketRequestsDone(); void testRejectRequestWithMismatchingDistributionHash(); void testDbNotIteratedWhenAllRequestsRejected(); + void fall_back_to_legacy_global_distribution_hash_on_mismatch(); public: static constexpr uint32_t DIR_SPREAD = 3; @@ -785,6 +788,10 @@ public: return std::make_shared<api::RequestBucketInfoCommand>(makeBucketSpace(), 0, _state, hash); } + auto createFullFetchCommandWithHash(document::BucketSpace space, vespalib::stringref hash) const { + return std::make_shared<api::RequestBucketInfoCommand>(space, 0, _state, hash); + } + auto acquireBucketLockAndSendInfoRequest(const document::BucketId& bucket) { auto guard = acquireBucketLock(bucket); // Send down processing command which will block. @@ -850,6 +857,45 @@ public: _self._top->getRepliesOnce(); } + // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + std::unique_ptr<lib::Distribution> default_grouped_distribution() { + return std::make_unique<lib::Distribution>( + GlobalBucketSpaceDistributionConverter::string_to_config(vespalib::string( +R"(redundancy 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].nodes[3] +group[1].nodes[0].index 0 +group[1].nodes[1].index 1 +group[1].nodes[2].index 2 +group[2].name rack1 +group[2].index 1 +group[2].nodes[3] +group[2].nodes[0].index 3 +group[2].nodes[1].index 4 +group[2].nodes[2].index 5 +)"))); + } + + std::shared_ptr<lib::Distribution> derived_global_grouped_distribution(bool use_legacy) { + auto default_distr = default_grouped_distribution(); + return GlobalBucketSpaceDistributionConverter::convert_to_global(*default_distr, use_legacy); + } + + void set_grouped_distribution_configs() { + auto default_distr = default_grouped_distribution(); + _self._node->getComponentRegister().getBucketSpaceRepo() + .get(document::FixedBucketSpaces::default_space()).setDistribution(std::move(default_distr)); + auto global_distr = derived_global_grouped_distribution(false); + _self._node->getComponentRegister().getBucketSpaceRepo() + .get(document::FixedBucketSpaces::global_space()).setDistribution(std::move(global_distr)); + } + private: BucketManagerTest& _self; lib::ClusterState _state; @@ -1358,4 +1404,19 @@ BucketManagerTest::testDbNotIteratedWhenAllRequestsRejected() auto replies = fixture.awaitAndGetReplies(1); } +// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 +void BucketManagerTest::fall_back_to_legacy_global_distribution_hash_on_mismatch() { + ConcurrentOperationFixture f(*this); + + f.set_grouped_distribution_configs(); + + auto legacy_hash = f.derived_global_grouped_distribution(true)->getNodeGraph().getDistributionConfigHash(); + + auto infoCmd = f.createFullFetchCommandWithHash(document::FixedBucketSpaces::global_space(), legacy_hash); + _top->sendDown(infoCmd); + auto replies = f.awaitAndGetReplies(1); + auto& reply = dynamic_cast<api::RequestBucketInfoReply&>(*replies[0]); + CPPUNIT_ASSERT_EQUAL(api::ReturnCode::OK, reply.getResult().getResult()); // _not_ REJECTED +} + } // storage diff --git a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp index 5afea9cd3cd..d75f2ac6459 100644 --- a/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp +++ b/storage/src/tests/common/global_bucket_space_distribution_converter_test.cpp @@ -17,6 +17,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture CPPUNIT_TEST(config_retired_state_is_propagated); CPPUNIT_TEST(group_capacities_are_propagated); CPPUNIT_TEST(global_distribution_has_same_owner_distributors_as_default); + CPPUNIT_TEST(can_generate_config_with_legacy_partition_spec); CPPUNIT_TEST_SUITE_END(); void can_transform_flat_cluster_config(); @@ -27,6 +28,7 @@ struct GlobalBucketSpaceDistributionConverterTest : public CppUnit::TestFixture void config_retired_state_is_propagated(); void group_capacities_are_propagated(); void global_distribution_has_same_owner_distributors_as_default(); + void can_generate_config_with_legacy_partition_spec(); }; CPPUNIT_TEST_SUITE_REGISTRATION(GlobalBucketSpaceDistributionConverterTest); @@ -35,9 +37,9 @@ using DistributionConfig = vespa::config::content::StorDistributionConfig; namespace { -vespalib::string default_to_global_config(const vespalib::string& default_config) { +vespalib::string default_to_global_config(const vespalib::string& default_config, bool legacy_mode = false) { auto default_cfg = GlobalBucketSpaceDistributionConverter::string_to_config(default_config); - auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg); + auto as_global = GlobalBucketSpaceDistributionConverter::convert_to_global(*default_cfg, legacy_mode); return GlobalBucketSpaceDistributionConverter::config_to_string(*as_global); } @@ -377,4 +379,64 @@ group[2].nodes[1].index 2 } } +// By "legacy" read "broken", but we need to be able to generate it to support rolling upgrades properly. +// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 +void GlobalBucketSpaceDistributionConverterTest::can_generate_config_with_legacy_partition_spec() { + vespalib::string default_config( +R"(redundancy 2 +group[3] +group[0].name "invalid" +group[0].index "invalid" +group[0].partitions 1|* +group[0].nodes[0] +group[1].name rack0 +group[1].index 0 +group[1].nodes[3] +group[1].nodes[0].index 0 +group[1].nodes[1].index 1 +group[1].nodes[2].index 2 +group[2].name rack1 +group[2].index 1 +group[2].nodes[3] +group[2].nodes[0].index 3 +group[2].nodes[1].index 4 +group[2].nodes[2].index 5 +)"); + + vespalib::string expected_global_config( +R"(redundancy 6 +initial_redundancy 0 +ensure_primary_persisted true +ready_copies 6 +active_per_leaf_group true +distributor_auto_ownership_transfer_on_whole_group_down true +group[0].index "invalid" +group[0].name "invalid" +group[0].capacity 1 +group[0].partitions "3|3|*" +group[1].index "0" +group[1].name "rack0" +group[1].capacity 1 +group[1].partitions "" +group[1].nodes[0].index 0 +group[1].nodes[0].retired false +group[1].nodes[1].index 1 +group[1].nodes[1].retired false +group[1].nodes[2].index 2 +group[1].nodes[2].retired false +group[2].index "1" +group[2].name "rack1" +group[2].capacity 1 +group[2].partitions "" +group[2].nodes[0].index 3 +group[2].nodes[0].retired false +group[2].nodes[1].index 4 +group[2].nodes[1].retired false +group[2].nodes[2].index 5 +group[2].nodes[2].retired false +disk_distribution MODULO_BID +)"); + CPPUNIT_ASSERT_EQUAL(expected_global_config, default_to_global_config(default_config, true)); +} + }
\ No newline at end of file diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index 53f80854bef..b2d554c1e42 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -111,6 +111,7 @@ class BucketDBUpdaterTest : public CppUnit::TestFixture, CPPUNIT_TEST(identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted); CPPUNIT_TEST(adding_diverging_replica_to_existing_trusted_does_not_remove_trusted); CPPUNIT_TEST(batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted); + CPPUNIT_TEST(global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection); CPPUNIT_TEST_SUITE_END(); public: @@ -175,6 +176,7 @@ protected: void identity_update_of_diverging_untrusted_replicas_does_not_mark_any_as_trusted(); void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted(); void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted(); + void global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection(); auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); } @@ -505,7 +507,7 @@ public: std::make_shared<lib::Distribution>(distConfig)); } - std::string getDistConfig6Nodes3Groups() const { + std::string getDistConfig6Nodes2Groups() const { return ("redundancy 2\n" "group[3]\n" "group[0].name \"invalid\"\n" @@ -692,7 +694,7 @@ BucketDBUpdaterTest::testDistributorChange() void BucketDBUpdaterTest::testDistributorChangeWithGrouping() { - std::string distConfig(getDistConfig6Nodes3Groups()); + std::string distConfig(getDistConfig6Nodes2Groups()); setDistribution(distConfig); int numBuckets = 100; @@ -2073,7 +2075,7 @@ BucketDBUpdaterTest::testClusterStateAlwaysSendsFullFetchWhenDistributionChangeP setAndEnableClusterState(stateBefore, expectedMsgs, dummyBucketsToReturn); } _sender.clear(); - std::string distConfig(getDistConfig6Nodes3Groups()); + std::string distConfig(getDistConfig6Nodes2Groups()); setDistribution(distConfig); sortSentMessagesByIndex(_sender); CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size()); @@ -2549,4 +2551,52 @@ void BucketDBUpdaterTest::batch_update_from_distributor_change_does_not_mark_div "0:5/1/2/3|1:5/7/8/9", true)); } +// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 +void BucketDBUpdaterTest::global_distribution_hash_falls_back_to_legacy_format_upon_request_rejection() { + std::string distConfig(getDistConfig6Nodes2Groups()); + setDistribution(distConfig); + + const vespalib::string current_hash = "(0d*|*(0;0;1;2)(1;3;4;5))"; + const vespalib::string legacy_hash = "(0d3|3|*(0;0;1;2)(1;3;4;5))"; + + setSystemState(lib::ClusterState("distributor:6 storage:6")); + CPPUNIT_ASSERT_EQUAL(messageCount(6), _sender.commands.size()); + + api::RequestBucketInfoCommand* global_req = nullptr; + for (auto& cmd : _sender.commands) { + auto& req_cmd = dynamic_cast<api::RequestBucketInfoCommand&>(*cmd); + if (req_cmd.getBucketSpace() == document::FixedBucketSpaces::global_space()) { + global_req = &req_cmd; + break; + } + } + CPPUNIT_ASSERT(global_req != nullptr); + CPPUNIT_ASSERT_EQUAL(current_hash, global_req->getDistributionHash()); + + auto reply = std::make_shared<api::RequestBucketInfoReply>(*global_req); + reply->setResult(api::ReturnCode::REJECTED); + getBucketDBUpdater().onRequestBucketInfoReply(reply); + + getClock().addSecondsToTime(10); + getBucketDBUpdater().resendDelayedMessages(); + + // Should now be a resent request with legacy distribution hash + CPPUNIT_ASSERT_EQUAL(messageCount(6) + 1, _sender.commands.size()); + auto& legacy_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back()); + CPPUNIT_ASSERT_EQUAL(legacy_hash, legacy_req.getDistributionHash()); + + // Now if we reject it _again_ we should cycle back to the current hash + // in case it wasn't a hash-based rejection after all. And the circle of life continues. + reply = std::make_shared<api::RequestBucketInfoReply>(legacy_req); + reply->setResult(api::ReturnCode::REJECTED); + getBucketDBUpdater().onRequestBucketInfoReply(reply); + + getClock().addSecondsToTime(10); + getBucketDBUpdater().resendDelayedMessages(); + + CPPUNIT_ASSERT_EQUAL(messageCount(6) + 2, _sender.commands.size()); + auto& new_current_req = dynamic_cast<api::RequestBucketInfoCommand&>(*_sender.commands.back()); + CPPUNIT_ASSERT_EQUAL(current_hash, new_current_req.getDistributionHash()); +} + } diff --git a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp index 41de215d877..a1c1742edb5 100644 --- a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp +++ b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp @@ -6,6 +6,7 @@ #include <iomanip> #include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/common/nodestateupdater.h> +#include <vespa/storage/common/global_bucket_space_distribution_converter.h> #include <vespa/vdslib/state/cluster_state_bundle.h> #include <vespa/storage/storageutil/distributorstatecache.h> #include <vespa/storageframework/generic/status/htmlstatusreporter.h> @@ -577,7 +578,21 @@ BucketManager::processRequestBucketInfoCommands(document::BucketSpace bucketSpac << " differs from this state."; } else if (!their_hash.empty() && their_hash != our_hash) { // Empty hash indicates request from 4.2 protocol or earlier - error << "Distribution config has changed since request."; + // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + bool matches_legacy_hash = false; + if (bucketSpace == document::FixedBucketSpaces::global_space()) { + const auto default_distr =_component.getBucketSpaceRepo() + .get(document::FixedBucketSpaces::default_space()).getDistribution(); + // Convert in legacy distribution mode, which will accept old 'hash' structure. + const auto legacy_global_distr = GlobalBucketSpaceDistributionConverter::convert_to_global( + *default_distr, true/*use legacy mode*/); + const auto legacy_hash = legacy_global_distr->getNodeGraph().getDistributionConfigHash(); + LOG(debug, "Falling back to comparing against legacy distribution hash: %s", legacy_hash.c_str()); + matches_legacy_hash = (their_hash == legacy_hash); + } + if (!matches_legacy_hash) { + error << "Distribution config has changed since request."; + } } if (error.str().empty()) { std::pair<std::set<uint16_t>::iterator, bool> result( diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp index 534644458bc..cbcaeef8fdf 100644 --- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp +++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.cpp @@ -59,6 +59,21 @@ vespalib::string sub_groups_to_partition_spec(const Group& parent) { return spec.str(); } +// Allow generating legacy (broken) partition specs that may be used transiently +// during rolling upgrades from a pre-fix version to a post-fix version. +// TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 +vespalib::string sub_groups_to_legacy_partition_spec(const Group& parent) { + vespalib::asciistream partitions; + // In case of a flat cluster config, this ends up with a partition spec of '*', + // which is fine. It basically means "put all replicas in this group", which + // happens to be exactly what we want. + for (auto& child : parent.sub_groups) { + partitions << child.second->nested_leaf_count << '|'; + } + partitions << '*'; + return partitions.str(); +} + bool is_leaf_group(const DistributionConfigBuilder::Group& g) noexcept { return !g.nodes.empty(); } @@ -87,19 +102,31 @@ void insert_new_group_into_tree( void build_transformed_root_group(DistributionConfigBuilder& builder, const DistributionConfigBuilder::Group& config_source_root, - const Group& parsed_root) { + const Group& parsed_root, + bool legacy_mode) { DistributionConfigBuilder::Group new_root(config_source_root); - new_root.partitions = sub_groups_to_partition_spec(parsed_root); + if (!legacy_mode) { + new_root.partitions = sub_groups_to_partition_spec(parsed_root); + } else { + // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + new_root.partitions = sub_groups_to_legacy_partition_spec(parsed_root); + } builder.group.emplace_back(std::move(new_root)); } void build_transformed_non_root_group(DistributionConfigBuilder& builder, const DistributionConfigBuilder::Group& config_source_group, - const Group& parsed_root) { + const Group& parsed_root, + bool legacy_mode) { DistributionConfigBuilder::Group new_group(config_source_group); if (!is_leaf_group(config_source_group)) { // Partition specs only apply to inner nodes const auto& g = find_non_root_group_by_index(config_source_group.index, parsed_root); - new_group.partitions = sub_groups_to_partition_spec(g); + if (!legacy_mode) { + new_group.partitions = sub_groups_to_partition_spec(g); + } else { + // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + new_group.partitions = sub_groups_to_legacy_partition_spec(g); + } } builder.group.emplace_back(std::move(new_group)); } @@ -135,16 +162,16 @@ std::unique_ptr<Group> create_group_tree_from_config(const DistributionConfig& s * transitively, its parents again etc) have already been processed. This directly * implies that the root group is always the first group present in the config. */ -void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source) { +void build_global_groups(DistributionConfigBuilder& builder, const DistributionConfig& source, bool legacy_mode) { assert(!source.group.empty()); // TODO gracefully handle empty config? auto root = create_group_tree_from_config(source); auto g_iter = source.group.begin(); const auto g_end = source.group.end(); - build_transformed_root_group(builder, *g_iter, *root); + build_transformed_root_group(builder, *g_iter, *root, legacy_mode); ++g_iter; for (; g_iter != g_end; ++g_iter) { - build_transformed_non_root_group(builder, *g_iter, *root); + build_transformed_non_root_group(builder, *g_iter, *root, legacy_mode); } builder.redundancy = root->nested_leaf_count; @@ -154,17 +181,17 @@ void build_global_groups(DistributionConfigBuilder& builder, const DistributionC } // anon ns std::shared_ptr<DistributionConfig> -GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source) { +GlobalBucketSpaceDistributionConverter::convert_to_global(const DistributionConfig& source, bool legacy_mode) { DistributionConfigBuilder builder; set_distribution_invariant_config_fields(builder, source); - build_global_groups(builder, source); + build_global_groups(builder, source, legacy_mode); return std::make_shared<DistributionConfig>(builder); } std::shared_ptr<lib::Distribution> -GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr) { +GlobalBucketSpaceDistributionConverter::convert_to_global(const lib::Distribution& distr, bool legacy_mode) { const auto src_config = distr.serialize(); - auto global_config = convert_to_global(*string_to_config(src_config)); + auto global_config = convert_to_global(*string_to_config(src_config), legacy_mode); return std::make_shared<lib::Distribution>(*global_config); } diff --git a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h index d135f56a5c1..b2be65dad42 100644 --- a/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h +++ b/storage/src/vespa/storage/common/global_bucket_space_distribution_converter.h @@ -10,8 +10,9 @@ namespace storage { struct GlobalBucketSpaceDistributionConverter { using DistributionConfig = vespa::config::content::StorDistributionConfig; - static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&); - static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&); + // TODO remove legacy_mode flags on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + static std::shared_ptr<DistributionConfig> convert_to_global(const DistributionConfig&, bool legacy_mode = false); + static std::shared_ptr<lib::Distribution> convert_to_global(const lib::Distribution&, bool legacy_mode = false); // Helper functions which may be of use outside this class static std::unique_ptr<DistributionConfig> string_to_config(const vespalib::string&); diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp index 2071558628e..c295be19a0b 100644 --- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp @@ -35,7 +35,8 @@ PendingBucketSpaceDbTransition::PendingBucketSpaceDbTransition(const PendingClus _pendingClusterState(pendingClusterState), _distributorBucketSpace(distributorBucketSpace), _distributorIndex(_clusterInfo->getDistributorIndex()), - _bucketOwnershipTransfer(distributionChanged) + _bucketOwnershipTransfer(distributionChanged), + _rejectedRequests() { if (distributorChanged()) { _bucketOwnershipTransfer = true; diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h index 903f9b762fb..7eb2974eb52 100644 --- a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h @@ -4,6 +4,7 @@ #include "pending_bucket_space_db_transition_entry.h" #include "outdated_nodes.h" #include <vespa/storage/bucketdb/bucketdatabase.h> +#include <unordered_map> namespace storage::api { class RequestBucketInfoReply; } namespace storage::lib { class ClusterState; class State; } @@ -48,6 +49,7 @@ private: DistributorBucketSpace &_distributorBucketSpace; uint16_t _distributorIndex; bool _bucketOwnershipTransfer; + std::unordered_map<uint16_t, size_t> _rejectedRequests; // BucketDataBase::MutableEntryProcessor API bool process(BucketDatabase::Entry& e) override; @@ -111,6 +113,14 @@ public: // Methods used by unit tests. const EntryList& results() const { return _entries; } void addNodeInfo(const document::BucketId& id, const BucketCopy& copy); + + void incrementRequestRejections(uint16_t node) { + _rejectedRequests[node]++; + } + size_t rejectedRequests(uint16_t node) const { + auto iter = _rejectedRequests.find(node); + return ((iter != _rejectedRequests.end()) ? iter->second : 0); + } }; } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 1996ae9d2af..5f74a82c28a 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -7,6 +7,7 @@ #include "distributor_bucket_space.h" #include <vespa/storageframework/defaultimplementation/clock/realclock.h> #include <vespa/storage/common/bucketoperationlogger.h> +#include <vespa/storage/common/global_bucket_space_distribution_converter.h> #include <vespa/document/bucket/fixed_bucket_spaces.h> #include <vespa/vespalib/util/xmlstream.hpp> #include <climits> @@ -185,7 +186,30 @@ PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode) { const auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpaceAndNode.bucketSpace)); const auto &distribution(distributorBucketSpace.getDistribution()); - vespalib::string distributionHash(distribution.getNodeGraph().getDistributionConfigHash()); + vespalib::string distributionHash; + // TODO remove on Vespa 8 - this is a workaround for https://github.com/vespa-engine/vespa/issues/8475 + bool sendLegacyHash = false; + if (bucketSpaceAndNode.bucketSpace == document::FixedBucketSpaces::global_space()) { + auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace); + assert(transitionIter != _pendingTransitions.end()); + // First request cannot have been rejected yet and will thus be sent with non-legacy hash. + // Subsequent requests will be sent 50-50. This is because a request may be rejected due to + // other reasons than just the hash mismatching, so if we don't cycle back to the non-legacy + // hash we risk never converging. + sendLegacyHash = ((transitionIter->second->rejectedRequests(bucketSpaceAndNode.node) % 2) == 1); + } + if (!sendLegacyHash) { + distributionHash = distribution.getNodeGraph().getDistributionConfigHash(); + } else { + const auto& defaultSpace = _bucketSpaceRepo.get(document::FixedBucketSpaces::default_space()); + // Generate legacy distribution hash explicitly. + auto legacyGlobalDistr = GlobalBucketSpaceDistributionConverter::convert_to_global( + defaultSpace.getDistribution(), true/*use legacy mode*/); + distributionHash = legacyGlobalDistr->getNodeGraph().getDistributionConfigHash(); + LOG(debug, "Falling back to sending legacy hash to node %u: %s", + bucketSpaceAndNode.node, distributionHash.c_str()); + } + LOG(debug, "Requesting bucket info for bucket space %" PRIu64 " node %d with cluster state '%s' " "and distribution hash '%s'", @@ -238,6 +262,11 @@ PendingClusterState::onRequestBucketInfoReply(const std::shared_ptr<api::Request resendTime += framework::MilliSecTime(100); _delayedRequests.emplace_back(resendTime, bucketSpaceAndNode); _sentMessages.erase(iter); + if (result.getResult() == api::ReturnCode::REJECTED) { + auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace); + assert(transitionIter != _pendingTransitions.end()); + transitionIter->second->incrementRequestRejections(bucketSpaceAndNode.node); + } return true; } |