diff options
Diffstat (limited to 'storage/src')
104 files changed, 2054 insertions, 1623 deletions
diff --git a/storage/src/tests/distributor/bucketdbupdatertest.cpp b/storage/src/tests/distributor/bucketdbupdatertest.cpp index d1a54c04359..b9e33ea8d26 100644 --- a/storage/src/tests/distributor/bucketdbupdatertest.cpp +++ b/storage/src/tests/distributor/bucketdbupdatertest.cpp @@ -4,18 +4,23 @@ #include <iomanip> #include <vespa/storageapi/message/persistence.h> #include <vespa/storage/distributor/bucketdbupdater.h> +#include <vespa/storage/distributor/pending_bucket_space_db_transition.h> +#include <vespa/storage/distributor/outdated_nodes_map.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/storageframework/defaultimplementation/clock/realclock.h> #include <vespa/storage/storageutil/distributorstatecache.h> #include <tests/distributor/distributortestutil.h> #include <vespa/document/test/make_document_bucket.h> +#include <vespa/document/test/make_bucket_space.h> #include <vespa/storage/distributor/simpleclusterinformation.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/vespalib/text/stringtokenizer.h> using namespace storage::api; using namespace storage::lib; using document::test::makeDocumentBucket; +using document::test::makeBucketSpace; namespace storage { namespace distributor { @@ -141,19 +146,21 @@ protected: void adding_diverging_replica_to_existing_trusted_does_not_remove_trusted(); void batch_update_from_distributor_change_does_not_mark_diverging_replicas_as_trusted(); + auto &defaultDistributorBucketSpace() { return getBucketSpaceRepo().get(makeBucketSpace()); } + bool bucketExistsThatHasNode(int bucketCount, uint16_t node) const; ClusterInformation::CSP createClusterInfo(const std::string& clusterState) { ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( getBucketDBUpdater().getDistributorComponent().getIndex(), - getBucketDBUpdater().getDistributorComponent().getDistribution(), lib::ClusterState(clusterState), "ui")); return clusterInfo; } public: + using OutdatedNodesMap = dbtransition::OutdatedNodesMap; void setUp() override { createLinks(); }; @@ -181,8 +188,7 @@ public: } std::vector<uint16_t> nodes; - getBucketDBUpdater().getDistributorComponent() - .getDistribution().getIdealNodes( + defaultDistributorBucketSpace().getDistribution().getIdealNodes( lib::NodeType::STORAGE, state, document::BucketId(16, i), @@ -243,7 +249,7 @@ public: } std::vector<uint16_t> nodes; - getBucketDBUpdater().getDistributorComponent().getDistribution().getIdealNodes( + defaultDistributorBucketSpace().getDistribution().getIdealNodes( lib::NodeType::STORAGE, state, document::BucketId(id), @@ -536,9 +542,9 @@ public: ClusterInformation::CSP clusterInfo( owner.createClusterInfo(oldClusterState)); - std::unordered_set<uint16_t> outdatedNodes; + OutdatedNodesMap outdatedNodesMap; state = PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, cmd, outdatedNodes, + clock, clusterInfo, sender, owner.getBucketSpaceRepo(), cmd, outdatedNodesMap, api::Timestamp(1)); } @@ -549,9 +555,8 @@ public: ClusterInformation::CSP clusterInfo( owner.createClusterInfo(oldClusterState)); - std::unordered_set<uint16_t> outdatedNodes; state = PendingClusterState::createForDistributionChange( - clock, clusterInfo, sender, api::Timestamp(1)); + clock, clusterInfo, sender, owner.getBucketSpaceRepo(), api::Timestamp(1)); } }; @@ -582,7 +587,7 @@ BucketDBUpdaterTest::testNormalUsage() // Ensure distribution hash is set correctly CPPUNIT_ASSERT_EQUAL( - getBucketDBUpdater().getDistributorComponent().getDistribution() + defaultDistributorBucketSpace().getDistribution() .getNodeGraph().getDistributionConfigHash(), dynamic_cast<const RequestBucketInfoCommand&>( *_sender.commands[0]).getDistributionHash()); @@ -876,7 +881,7 @@ BucketDBUpdaterTest::testInitializingWhileRecheck() CPPUNIT_ASSERT_EQUAL(size_t(2), _sender.commands.size()); CPPUNIT_ASSERT_EQUAL(size_t(0), _senderDown.commands.size()); - getBucketDBUpdater().recheckBucketInfo(1, document::BucketId(16, 3)); + getBucketDBUpdater().recheckBucketInfo(1, makeDocumentBucket(document::BucketId(16, 3))); for (int i=0; i<2; i++) { fakeBucketReply(systemState, @@ -913,8 +918,7 @@ BucketDBUpdaterTest::testBitChange() int cnt=0; for (int i=0; cnt < 2; i++) { - lib::Distribution distribution = getBucketDBUpdater().getDistributorComponent() - .getDistribution(); + lib::Distribution distribution = defaultDistributorBucketSpace().getDistribution(); std::vector<uint16_t> distributors; if (distribution.getIdealDistributorNode( lib::ClusterState("redundancy:1 bits:14 storage:1 distributor:2"), @@ -1006,7 +1010,7 @@ BucketDBUpdaterTest::testRecheckNodeWithFailure() _sender.clear(); - getBucketDBUpdater().recheckBucketInfo(1, document::BucketId(16, 3)); + getBucketDBUpdater().recheckBucketInfo(1, makeDocumentBucket(document::BucketId(16, 3))); CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.commands.size()); @@ -1056,7 +1060,7 @@ BucketDBUpdaterTest::testRecheckNode() _sender.clear(); - getBucketDBUpdater().recheckBucketInfo(1, document::BucketId(16, 3)); + getBucketDBUpdater().recheckBucketInfo(1, makeDocumentBucket(document::BucketId(16, 3))); CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.commands.size()); @@ -1475,7 +1479,7 @@ BucketDBUpdaterTest::getSentNodesDistributionChanged( ClusterInformation::CSP clusterInfo(createClusterInfo(oldClusterState)); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForDistributionChange( - clock, clusterInfo, sender, api::Timestamp(1))); + clock, clusterInfo, sender, getBucketSpaceRepo(), api::Timestamp(1))); sortSentMessagesByIndex(sender); @@ -1637,10 +1641,10 @@ BucketDBUpdaterTest::testPendingClusterStateReceive() framework::defaultimplementation::FakeClock clock; ClusterInformation::CSP clusterInfo(createClusterInfo("cluster:d")); - std::unordered_set<uint16_t> outdatedNodes; + OutdatedNodesMap outdatedNodesMap; std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, cmd, outdatedNodes, + clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, api::Timestamp(1))); CPPUNIT_ASSERT_EQUAL(3, (int)sender.commands.size()); @@ -1668,7 +1672,8 @@ BucketDBUpdaterTest::testPendingClusterStateReceive() state->done()); } - CPPUNIT_ASSERT_EQUAL(3, (int)state->results().size()); + auto &pendingTransition = state->getPendingBucketSpaceDbTransition(makeBucketSpace()); + CPPUNIT_ASSERT_EQUAL(3, (int)pendingTransition.results().size()); } void @@ -1721,13 +1726,14 @@ parseInputData(const std::string& data, uint16_t node = atoi(tok2[0].c_str()); state.setNodeReplied(node); + auto &pendingTransition = state.getPendingBucketSpaceDbTransition(makeBucketSpace()); vespalib::StringTokenizer tok3(tok2[1], ","); for (uint32_t j = 0; j < tok3.size(); j++) { if (includeBucketInfo) { vespalib::StringTokenizer tok4(tok3[j], "/"); - state.addNodeInfo( + pendingTransition.addNodeInfo( document::BucketId(16, atoi(tok4[0].c_str())), BucketCopy( timestamp, @@ -1739,7 +1745,7 @@ parseInputData(const std::string& data, atoi(tok4[2].c_str()), atoi(tok4[3].c_str())))); } else { - state.addNodeInfo( + pendingTransition.addNodeInfo( document::BucketId(16, atoi(tok3[j].c_str())), BucketCopy(timestamp, node, @@ -1793,7 +1799,7 @@ BucketDBUpdaterTest::mergeBucketLists( framework::MilliSecTimer timer(clock); MessageSenderStub sender; - std::unordered_set<uint16_t> outdatedNodes; + OutdatedNodesMap outdatedNodesMap; { auto cmd(std::make_shared<api::SetSystemStateCommand>(oldState)); @@ -1803,11 +1809,11 @@ BucketDBUpdaterTest::mergeBucketLists( ClusterInformation::CSP clusterInfo(createClusterInfo("cluster:d")); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, cmd, outdatedNodes, + clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, beforeTime)); parseInputData(existingData, beforeTime, *state, includeBucketInfo); - state->mergeInto(getBucketDBUpdater().getDistributorComponent().getBucketDatabase()); + state->mergeIntoBucketDatabases(); } BucketDumper dumper_tmp(true); @@ -1822,19 +1828,17 @@ BucketDBUpdaterTest::mergeBucketLists( ClusterInformation::CSP clusterInfo(createClusterInfo(oldState.toString())); std::unique_ptr<PendingClusterState> state( PendingClusterState::createForClusterStateChange( - clock, clusterInfo, sender, cmd, outdatedNodes, + clock, clusterInfo, sender, getBucketSpaceRepo(), cmd, outdatedNodesMap, afterTime)); parseInputData(newData, afterTime, *state, includeBucketInfo); - state->mergeInto(getBucketDBUpdater().getDistributorComponent() - .getBucketDatabase()); + state->mergeIntoBucketDatabases(); } BucketDumper dumper(includeBucketInfo); - getBucketDBUpdater().getDistributorComponent() - .getBucketDatabase().forEach(dumper); - getBucketDBUpdater().getDistributorComponent() - .getBucketDatabase().clear(); + auto &bucketDb(defaultDistributorBucketSpace().getBucketDatabase()); + bucketDb.forEach(dumper); + bucketDb.clear(); return dumper.ost.str(); } @@ -1949,7 +1953,7 @@ BucketDBUpdaterTest::testNoDbResurrectionForBucketNotOwnedInCurrentState() } _sender.clear(); - getBucketDBUpdater().recheckBucketInfo(0, bucket); + getBucketDBUpdater().recheckBucketInfo(0, makeDocumentBucket(bucket)); CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.commands.size()); std::shared_ptr<api::RequestBucketInfoCommand> rbi( @@ -1981,7 +1985,7 @@ BucketDBUpdaterTest::testNoDbResurrectionForBucketNotOwnedInPendingState() } _sender.clear(); - getBucketDBUpdater().recheckBucketInfo(0, bucket); + getBucketDBUpdater().recheckBucketInfo(0, makeDocumentBucket(bucket)); CPPUNIT_ASSERT_EQUAL(size_t(1), _sender.commands.size()); std::shared_ptr<api::RequestBucketInfoCommand> rbi( @@ -1994,7 +1998,7 @@ BucketDBUpdaterTest::testNoDbResurrectionForBucketNotOwnedInPendingState() CPPUNIT_ASSERT(getBucketDBUpdater().getDistributorComponent() .ownsBucketInCurrentState(makeDocumentBucket(bucket))); CPPUNIT_ASSERT(!getBucketDBUpdater() - .checkOwnershipInPendingState(bucket).isOwned()); + .checkOwnershipInPendingState(makeDocumentBucket(bucket)).isOwned()); sendFakeReplyForSingleBucketRequest(*rbi); diff --git a/storage/src/tests/distributor/distributortest.cpp b/storage/src/tests/distributor/distributortest.cpp index cbc78157911..1640af0f871 100644 --- a/storage/src/tests/distributor/distributortest.cpp +++ b/storage/src/tests/distributor/distributortest.cpp @@ -556,7 +556,7 @@ Distributor_Test::testNoDbResurrectionForBucketNotOwnedInPendingState() document::BucketId nonOwnedBucket(16, 3); CPPUNIT_ASSERT(!getBucketDBUpdater() - .checkOwnershipInPendingState(nonOwnedBucket).isOwned()); + .checkOwnershipInPendingState(makeDocumentBucket(nonOwnedBucket)).isOwned()); CPPUNIT_ASSERT(!getBucketDBUpdater().getDistributorComponent() .checkOwnershipInPendingAndCurrentState(makeDocumentBucket(nonOwnedBucket)) .isOwned()); diff --git a/storage/src/tests/distributor/distributortestutil.cpp b/storage/src/tests/distributor/distributortestutil.cpp index 5deb31f8579..6f5abc02512 100644 --- a/storage/src/tests/distributor/distributortestutil.cpp +++ b/storage/src/tests/distributor/distributortestutil.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "distributortestutil.h" #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/config-stor-distribution.h> #include <vespa/vespalib/text/stringtokenizer.h> #include <vespa/document/test/make_document_bucket.h> @@ -358,6 +359,16 @@ DistributorTestUtil::getBucketDatabase() const { return _distributor->getDefaultBucketSpace().getBucketDatabase(); } +DistributorBucketSpaceRepo & +DistributorTestUtil::getBucketSpaceRepo() { + return _distributor->getBucketSpaceRepo(); +} + +const DistributorBucketSpaceRepo & +DistributorTestUtil::getBucketSpaceRepo() const { + return _distributor->getBucketSpaceRepo(); +} + const lib::Distribution& DistributorTestUtil::getDistribution() const { return _distributor->getDefaultBucketSpace().getDistribution(); diff --git a/storage/src/tests/distributor/distributortestutil.h b/storage/src/tests/distributor/distributortestutil.h index 4f09c11ac03..19da0483165 100644 --- a/storage/src/tests/distributor/distributortestutil.h +++ b/storage/src/tests/distributor/distributortestutil.h @@ -20,6 +20,7 @@ namespace distributor { class BucketDBUpdater; class Distributor; class DistributorBucketSpace; +class DistributorBucketSpaceRepo; class IdealStateManager; class ExternalOperationHandler; class Operation; @@ -125,6 +126,8 @@ public: DistributorBucketSpace &getDistributorBucketSpace(); BucketDatabase& getBucketDatabase(); const BucketDatabase& getBucketDatabase() const; + DistributorBucketSpaceRepo &getBucketSpaceRepo(); + const DistributorBucketSpaceRepo &getBucketSpaceRepo() const; const lib::Distribution& getDistribution() const; // "End to end" distribution change trigger, which will invoke the bucket diff --git a/storage/src/tests/distributor/operationtargetresolvertest.cpp b/storage/src/tests/distributor/operationtargetresolvertest.cpp index 83e004f59fe..1fea6e47656 100644 --- a/storage/src/tests/distributor/operationtargetresolvertest.cpp +++ b/storage/src/tests/distributor/operationtargetresolvertest.cpp @@ -3,16 +3,22 @@ #include <vespa/config/helper/configgetter.h> #include <vespa/document/config/config-documenttypes.h> #include <vespa/document/repo/documenttyperepo.h> +#include <vespa/document/test/make_bucket_space.h> +#include <vespa/document/test/make_document_bucket.h> #include <vespa/storageapi/message/bucket.h> #include <vespa/storageapi/message/persistence.h> #include <tests/distributor/distributortestutil.h> #include <vespa/vdslib/distribution/idealnodecalculatorimpl.h> #include <vespa/vespalib/testkit/testapp.h> +#include <vespa/storage/distributor/distributor_bucket_space_repo.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storage/distributor/operationtargetresolverimpl.h> #include <vespa/storage/distributor/externaloperationhandler.h> #include <vespa/config/helper/configgetter.hpp> using document::BucketId; +using document::test::makeBucketSpace; +using document::test::makeDocumentBucket; namespace storage { namespace distributor { @@ -112,19 +118,19 @@ namespace { BucketInstanceList result(_test.getInstances(_id, true)); BucketInstanceList all(_test.getInstances(_id, false)); _asserter.assertEqualMsg( - all.toString(), _expected, result.createTargets()); + all.toString(), _expected, result.createTargets(makeBucketSpace())); delete _asserters.back(); _asserters.pop_back(); } TestTargets& sendsTo(const BucketId& id, uint16_t node) { _expected.push_back(OperationTarget( - id, lib::Node(lib::NodeType::STORAGE, node), false)); + makeDocumentBucket(id), lib::Node(lib::NodeType::STORAGE, node), false)); return *this; } TestTargets& createsAt(const BucketId& id, uint16_t node) { _expected.push_back(OperationTarget( - id, lib::Node(lib::NodeType::STORAGE, node), true)); + makeDocumentBucket(id), lib::Node(lib::NodeType::STORAGE, node), true)); return *this; } @@ -144,11 +150,14 @@ OperationTargetResolverTest::getInstances(const BucketId& id, bool stripToRedundancy) { lib::IdealNodeCalculatorImpl idealNodeCalc; - idealNodeCalc.setDistribution(getExternalOperationHandler().getDistribution()); + auto &bucketSpaceRepo(getExternalOperationHandler().getBucketSpaceRepo()); + auto &distributorBucketSpace(bucketSpaceRepo.get(makeBucketSpace())); + idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); idealNodeCalc.setClusterState(getExternalOperationHandler().getClusterState()); OperationTargetResolverImpl resolver( - getExternalOperationHandler().getBucketDatabase(), idealNodeCalc, 16, - getExternalOperationHandler().getDistribution().getRedundancy()); + distributorBucketSpace.getBucketDatabase(), idealNodeCalc, 16, + distributorBucketSpace.getDistribution().getRedundancy(), + makeBucketSpace()); if (stripToRedundancy) { return resolver.getInstances(OperationTargetResolver::PUT, id); } else { @@ -174,11 +183,13 @@ OperationTargetResolverTest::testMultipleNodes() { setupDistributor(1, 2, "storage:2 distributor:1"); + auto &bucketSpaceRepo(getExternalOperationHandler().getBucketSpaceRepo()); + auto &distributorBucketSpace(bucketSpaceRepo.get(makeBucketSpace())); for (int i = 0; i < 100; ++i) { addNodesToBucketDB(BucketId(16, i), "0=0,1=0"); lib::IdealNodeCalculatorImpl idealNodeCalc; - idealNodeCalc.setDistribution(getExternalOperationHandler().getDistribution()); + idealNodeCalc.setDistribution(distributorBucketSpace.getDistribution()); idealNodeCalc.setClusterState(getExternalOperationHandler().getClusterState()); lib::IdealNodeList idealNodes( idealNodeCalc.getIdealStorageNodes(BucketId(16, i))); diff --git a/storage/src/tests/distributor/simplemaintenancescannertest.cpp b/storage/src/tests/distributor/simplemaintenancescannertest.cpp index a46419b71a4..66a2d3efa6c 100644 --- a/storage/src/tests/distributor/simplemaintenancescannertest.cpp +++ b/storage/src/tests/distributor/simplemaintenancescannertest.cpp @@ -1,6 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/document/test/make_bucket_space.h> #include <vespa/vdstestlib/cppunit/macros.h> +#include <vespa/storage/distributor/distributor_bucket_space_repo.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storage/distributor/maintenance/simplemaintenancescanner.h> #include <vespa/storage/distributor/maintenance/simplebucketprioritydatabase.h> #include <vespa/storage/bucketdb/mapbucketdatabase.h> @@ -11,11 +14,13 @@ namespace storage::distributor { using document::BucketId; +using document::test::makeBucketSpace; typedef MaintenancePriority Priority; class SimpleMaintenanceScannerTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(SimpleMaintenanceScannerTest); CPPUNIT_TEST(testPrioritizeSingleBucket); + CPPUNIT_TEST(testPrioritizeSingleBucketAltBucketSpace); CPPUNIT_TEST(testPrioritizeMultipleBuckets); CPPUNIT_TEST(testPendingMaintenanceOperationStatistics); CPPUNIT_TEST(perNodeMaintenanceStatsAreTracked); @@ -27,10 +32,11 @@ class SimpleMaintenanceScannerTest : public CppUnit::TestFixture { std::string dumpPriorityDbToString(const BucketPriorityDatabase&) const; std::unique_ptr<MockMaintenancePriorityGenerator> _priorityGenerator; - std::unique_ptr<MapBucketDatabase> _bucketDb; + std::unique_ptr<DistributorBucketSpaceRepo> _bucketSpaceRepo; std::unique_ptr<SimpleBucketPriorityDatabase> _priorityDb; std::unique_ptr<SimpleMaintenanceScanner> _scanner; + void addBucketToDb(document::BucketSpace bucketSpace, int bucketNum); void addBucketToDb(int bucketNum); bool scanEntireDatabase(int expected); @@ -39,6 +45,7 @@ class SimpleMaintenanceScannerTest : public CppUnit::TestFixture { public: void testPrioritizeSingleBucket(); + void testPrioritizeSingleBucketAltBucketSpace(); void testPrioritizeMultipleBuckets(); void testPendingMaintenanceOperationStatistics(); void perNodeMaintenanceStatsAreTracked(); @@ -53,16 +60,23 @@ void SimpleMaintenanceScannerTest::setUp() { _priorityGenerator.reset(new MockMaintenancePriorityGenerator()); - _bucketDb.reset(new MapBucketDatabase()); + _bucketSpaceRepo = std::make_unique<DistributorBucketSpaceRepo>(); _priorityDb.reset(new SimpleBucketPriorityDatabase()); - _scanner.reset(new SimpleMaintenanceScanner(*_priorityDb, *_priorityGenerator, *_bucketDb)); + _scanner.reset(new SimpleMaintenanceScanner(*_priorityDb, *_priorityGenerator, *_bucketSpaceRepo)); } void -SimpleMaintenanceScannerTest::addBucketToDb(int bucketNum) +SimpleMaintenanceScannerTest::addBucketToDb(document::BucketSpace bucketSpace, int bucketNum) { BucketDatabase::Entry entry(BucketId(16, bucketNum), BucketInfo()); - _bucketDb->update(entry); + auto &bucketDb(_bucketSpaceRepo->get(bucketSpace).getBucketDatabase()); + bucketDb.update(entry); +} + +void +SimpleMaintenanceScannerTest::addBucketToDb(int bucketNum) +{ + addBucketToDb(makeBucketSpace(), bucketNum); } std::string @@ -80,7 +94,27 @@ SimpleMaintenanceScannerTest::testPrioritizeSingleBucket() addBucketToDb(1); std::string expected("PrioritizedBucket(Bucket(BucketSpace(0x0000000000000000), BucketId(0x4000000000000001)), pri VERY_HIGH)\n"); - CPPUNIT_ASSERT(!_scanner->scanNext().isDone()); + auto scanResult = _scanner->scanNext(); + CPPUNIT_ASSERT(!scanResult.isDone()); + CPPUNIT_ASSERT_EQUAL(makeBucketSpace().getId(), scanResult.getBucketSpace().getId()); + CPPUNIT_ASSERT_EQUAL(expected, _priorityDb->toString()); + + CPPUNIT_ASSERT(_scanner->scanNext().isDone()); + CPPUNIT_ASSERT_EQUAL(expected, _priorityDb->toString()); +} + +void +SimpleMaintenanceScannerTest::testPrioritizeSingleBucketAltBucketSpace() +{ + document::BucketSpace bucketSpace(4); + _bucketSpaceRepo->add(bucketSpace, std::make_unique<DistributorBucketSpace>()); + _scanner->reset(); + addBucketToDb(bucketSpace, 1); + std::string expected("PrioritizedBucket(Bucket(BucketSpace(0x0000000000000004), BucketId(0x4000000000000001)), pri VERY_HIGH)\n"); + + auto scanResult = _scanner->scanNext(); + CPPUNIT_ASSERT(!scanResult.isDone()); + CPPUNIT_ASSERT_EQUAL(bucketSpace.getId(), scanResult.getBucketSpace().getId()); CPPUNIT_ASSERT_EQUAL(expected, _priorityDb->toString()); CPPUNIT_ASSERT(_scanner->scanNext().isDone()); diff --git a/storage/src/tests/distributor/statecheckerstest.cpp b/storage/src/tests/distributor/statecheckerstest.cpp index 29c922248e7..306f92cdd6a 100644 --- a/storage/src/tests/distributor/statecheckerstest.cpp +++ b/storage/src/tests/distributor/statecheckerstest.cpp @@ -13,6 +13,8 @@ #include <vespa/storage/distributor/operations/idealstate/setbucketstateoperation.h> #include <vespa/storage/distributor/operations/idealstate/splitoperation.h> #include <vespa/storage/distributor/maintenance/node_maintenance_stats_tracker.h> +#include <vespa/storage/distributor/distributor_bucket_space_repo.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storageapi/message/stat.h> #include <vespa/storage/storageutil/utils.h> #include <tests/distributor/distributortestutil.h> @@ -20,8 +22,12 @@ #include <vespa/storageapi/message/state.h> #include <vespa/config-stor-distribution.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/document/test/make_bucket_space.h> +#include <vespa/document/test/make_document_bucket.h> using namespace std::literals::string_literals; +using document::test::makeBucketSpace; +using document::test::makeDocumentBucket; namespace storage { namespace distributor { @@ -105,8 +111,9 @@ struct StateCheckersTest : public CppUnit::TestFixture, void assertCurrentIdealState(const document::BucketId& bucket, const std::vector<uint16_t> expected) { + auto &distributorBucketSpace(getIdealStateManager().getBucketSpaceRepo().get(makeBucketSpace())); std::vector<uint16_t> idealNodes( - getIdealStateManager().getDistributorComponent() + distributorBucketSpace .getDistribution().getIdealStorageNodes( getIdealStateManager().getDistributorComponent() .getClusterState(), @@ -128,17 +135,17 @@ struct StateCheckersTest : public CppUnit::TestFixture, std::ostringstream ost; c.siblingBucket = getIdealStateManager().getDistributorComponent() - .getSibling(c.bucketId); + .getSibling(c.getBucketId()); std::vector<BucketDatabase::Entry> entries; - getBucketDatabase().getAll(c.bucketId, entries); + getBucketDatabase().getAll(c.getBucketId(), entries); c.siblingEntry = getBucketDatabase().get(c.siblingBucket); c.entries = entries; for (uint32_t j = 0; j < entries.size(); ++j) { // Run checking only on this bucketid, but include all buckets // owned by it or owners of it, so we can detect inconsistent split. - if (entries[j].getBucketId() == c.bucketId) { + if (entries[j].getBucketId() == c.getBucketId()) { c.entry = entries[j]; StateChecker::Result result(checker.check(c)); @@ -263,7 +270,7 @@ struct StateCheckersTest : public CppUnit::TestFixture, lib::ClusterState(params._clusterState)); NodeMaintenanceStatsTracker statsTracker; StateChecker::Context c( - getExternalOperationHandler(), statsTracker, bid); + getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); std::string result = testStateChecker( checker, c, false, *params._blockerMessage, params._includeMessagePriority, @@ -361,7 +368,7 @@ std::string StateCheckersTest::testSplit(uint32_t splitCount, SplitBucketStateChecker checker; NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); getConfig().setSplitSize(splitSize); getConfig().setSplitCount(splitCount); getConfig().setMinimalBucketSplit(minSplitBits); @@ -465,7 +472,7 @@ StateCheckersTest::testInconsistentSplit(const document::BucketId& bid, { SplitInconsistentStateChecker checker; NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, true, PendingMessage(), includePriority); } @@ -533,7 +540,7 @@ StateCheckersTest::testJoin(uint32_t joinCount, getConfig().setMinimalBucketSplit(minSplitBits); NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, true, blocker, includePriority); } @@ -789,7 +796,7 @@ StateCheckersTest::testSynchronizeAndMove(const std::string& bucketInfo, _distributor->enableClusterState(lib::ClusterState(clusterState)); NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, false, blocker, includePriority); } @@ -820,7 +827,7 @@ StateCheckersTest::testSynchronizeAndMove() runAndVerify<SynchronizeAndMoveStateChecker>( CheckerParams() .expect("[Moving bucket to ideal node 3] " - "(scheduling pri VERY_LOW)") + "(scheduling pri LOW)") .bucketInfo("0=1,1=1,2=1") .clusterState("distributor:1 storage:4") .includeSchedulingPriority(true)); @@ -837,7 +844,7 @@ StateCheckersTest::testSynchronizeAndMove() CheckerParams() .expect("[Moving bucket to ideal node 1]" "[Moving bucket to ideal node 3] (pri 165) " - "(scheduling pri VERY_LOW)") + "(scheduling pri LOW)") .clusterState("distributor:1 storage:5") .bucketInfo("0=1,4=1,5=1") .includeMessagePriority(true) @@ -984,7 +991,7 @@ StateCheckersTest::testDeleteExtraCopies( } DeleteExtraCopiesStateChecker checker; NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, false, blocker, includePriority); } @@ -995,8 +1002,9 @@ StateCheckersTest::testDeleteExtraCopies() setupDistributor(2, 100, "distributor:1 storage:4"); { + auto &distributorBucketSpace(getIdealStateManager().getBucketSpaceRepo().get(makeBucketSpace())); std::vector<uint16_t> idealNodes( - getIdealStateManager().getDistributorComponent() + distributorBucketSpace .getDistribution().getIdealStorageNodes( getIdealStateManager().getDistributorComponent().getClusterState(), document::BucketId(17, 0), @@ -1133,7 +1141,7 @@ std::string StateCheckersTest::testBucketState( BucketStateStateChecker checker; NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, false, PendingMessage(), includePriority); } @@ -1332,7 +1340,7 @@ std::string StateCheckersTest::testBucketStatePerGroup( BucketStateStateChecker checker; NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, bid); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket(bid)); return testStateChecker(checker, c, false, PendingMessage(), includePriority); } @@ -1474,8 +1482,8 @@ std::string StateCheckersTest::testGarbageCollection( getConfig().setGarbageCollection("music", checkInterval); getConfig().setLastGarbageCollectionChangeTime(lastChangeTime); NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, - e.getBucketId()); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, + makeDocumentBucket(e.getBucketId())); getClock().setAbsoluteTimeInSeconds(nowTimestamp); return testStateChecker(checker, c, false, PendingMessage(), includePriority, includeSchedulingPri); @@ -1533,7 +1541,7 @@ StateCheckersTest::testGarbageCollection() void StateCheckersTest::gc_ops_are_prioritized_with_low_priority_category() { CPPUNIT_ASSERT_EQUAL( std::string("[Needs garbage collection: Last check at 3, current time 4000, " - "configured interval 300] (scheduling pri LOW)"), + "configured interval 300] (scheduling pri VERY_LOW)"), testGarbageCollection(3, 4000, 300, 1, false, true)); } @@ -1561,8 +1569,8 @@ StateCheckersTest::gcInhibitedWhenIdealNodeInMaintenance() getConfig().setGarbageCollection("music", 3600); getConfig().setLastGarbageCollectionChangeTime(0); NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, - bucket); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, + makeDocumentBucket(bucket)); getClock().setAbsoluteTimeInSeconds(4000); // Would normally (in a non-maintenance case) trigger GC due to having // overshot the GC check cycle. @@ -1727,7 +1735,7 @@ StateCheckersTest::contextPopulatesIdealStateContainers() setupDistributor(2, 100, "distributor:1 storage:4"); NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(getExternalOperationHandler(), statsTracker, {17, 0}); + StateChecker::Context c(getExternalOperationHandler(), getDistributorBucketSpace(), statsTracker, makeDocumentBucket({17, 0})); CPPUNIT_ASSERT_EQUAL((std::vector<uint16_t>{1, 3}), c.idealState); CPPUNIT_ASSERT_EQUAL(size_t(2), c.unorderedIdealState.size()); @@ -1772,7 +1780,7 @@ public: // NOTE: resets the bucket database! void runFor(const document::BucketId& bid) { Checker checker; - StateChecker::Context c(_fixture.getExternalOperationHandler(), _statsTracker, bid); + StateChecker::Context c(_fixture.getExternalOperationHandler(), _fixture.getDistributorBucketSpace(), _statsTracker, makeDocumentBucket(bid)); _result = _fixture.testStateChecker( checker, c, false, StateCheckersTest::PendingMessage(), false); } diff --git a/storage/src/tests/distributor/statoperationtest.cpp b/storage/src/tests/distributor/statoperationtest.cpp index 9ae8fc2fa4a..b010c5f6b79 100644 --- a/storage/src/tests/distributor/statoperationtest.cpp +++ b/storage/src/tests/distributor/statoperationtest.cpp @@ -8,6 +8,7 @@ #include <vespa/storage/distributor/operations/external/statbucketoperation.h> #include <vespa/storage/distributor/operations/external/statbucketlistoperation.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> using document::test::makeDocumentBucket; diff --git a/storage/src/tests/storageserver/documentapiconvertertest.cpp b/storage/src/tests/storageserver/documentapiconvertertest.cpp index 1744bfb6a79..386be60d88c 100644 --- a/storage/src/tests/storageserver/documentapiconvertertest.cpp +++ b/storage/src/tests/storageserver/documentapiconvertertest.cpp @@ -1,20 +1,27 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/base/testdocrepo.h> #include <cppunit/extensions/HelperMacros.h> +#include <vespa/config/subscription/configuri.h> +#include <vespa/document/base/testdocrepo.h> +#include <vespa/document/bucket/bucketidfactory.h> +#include <vespa/document/datatype/documenttype.h> +#include <vespa/document/select/parser.h> +#include <vespa/document/test/make_document_bucket.h> +#include <vespa/documentapi/documentapi.h> +#include <vespa/messagebus/emptyreply.h> +#include <vespa/storage/common/bucket_resolver.h> #include <vespa/storage/storageserver/documentapiconverter.h> #include <vespa/storageapi/message/batch.h> #include <vespa/storageapi/message/datagram.h> #include <vespa/storageapi/message/multioperation.h> #include <vespa/storageapi/message/persistence.h> -#include <vespa/documentapi/documentapi.h> -#include <vespa/messagebus/emptyreply.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/bucket/bucketidfactory.h> -#include <vespa/config/subscription/configuri.h> +#include <vespa/storageapi/message/removelocation.h> +#include <vespa/storageapi/message/stat.h> #include <vespa/vespalib/testkit/test_kit.h> -#include <vespa/document/test/make_document_bucket.h> +using document::Bucket; +using document::BucketId; +using document::BucketSpace; using document::DataType; using document::DocIdString; using document::Document; @@ -25,25 +32,81 @@ using document::test::makeDocumentBucket; namespace storage { +const DocumentId defaultDocId("id:test:text/html::0"); +const BucketSpace defaultBucketSpace(5); +const vespalib::string defaultSpaceName("myspace"); +const Bucket defaultBucket(defaultBucketSpace, BucketId(0)); + +struct MockBucketResolver : public BucketResolver { + virtual Bucket bucketFromId(const DocumentId &documentId) const override { + if (documentId.getDocType() == "text/html") { + return defaultBucket; + } + return Bucket(BucketSpace(0), BucketId(0)); + } + virtual BucketSpace bucketSpaceFromName(const vespalib::string &bucketSpace) const override { + if (bucketSpace == defaultSpaceName) { + return defaultBucketSpace; + } + return BucketSpace(0); + } + virtual vespalib::string nameFromBucketSpace(const document::BucketSpace &bucketSpace) const override { + if (bucketSpace == defaultBucketSpace) { + return defaultSpaceName; + } + return ""; + } +}; + struct DocumentApiConverterTest : public CppUnit::TestFixture { + MockBucketResolver _bucketResolver; std::unique_ptr<DocumentApiConverter> _converter; const DocumentTypeRepo::SP _repo; const DataType& _html_type; DocumentApiConverterTest() - : _repo(new DocumentTypeRepo(readDocumenttypesConfig( + : _bucketResolver(), + _repo(std::make_shared<DocumentTypeRepo>(readDocumenttypesConfig( TEST_PATH("config-doctypes.cfg")))), _html_type(*_repo->getDocumentType("text/html")) { } void setUp() override { - _converter.reset(new DocumentApiConverter("raw:")); + _converter.reset(new DocumentApiConverter("raw:", _bucketResolver)); }; + template <typename DerivedT, typename BaseT> + std::unique_ptr<DerivedT> dynamic_unique_ptr_cast(std::unique_ptr<BaseT> base) { + auto derived = dynamic_cast<DerivedT*>(base.get()); + CPPUNIT_ASSERT(derived); + base.release(); + return std::unique_ptr<DerivedT>(derived); + } + + template <typename T> + std::unique_ptr<T> toStorageAPI(documentapi::DocumentMessage &msg) { + auto result = _converter->toStorageAPI(msg, _repo); + return dynamic_unique_ptr_cast<T>(std::move(result)); + } + + template <typename T> + std::unique_ptr<T> toStorageAPI(mbus::Reply &fromReply, + api::StorageCommand &fromCommand) { + auto result = _converter->toStorageAPI(static_cast<documentapi::DocumentReply&>(fromReply), fromCommand); + return dynamic_unique_ptr_cast<T>(std::move(result)); + } + + template <typename T> + std::unique_ptr<T> toDocumentAPI(api::StorageCommand &cmd) { + auto result = _converter->toDocumentAPI(cmd, _repo); + return dynamic_unique_ptr_cast<T>(std::move(result)); + } + void testPut(); void testForwardedPut(); + void testUpdate(); void testRemove(); void testGet(); void testCreateVisitor(); @@ -54,10 +117,14 @@ struct DocumentApiConverterTest : public CppUnit::TestFixture void testVisitorInfo(); void testMultiOperation(); void testBatchDocumentUpdate(); + void testStatBucket(); + void testGetBucketList(); + void testRemoveLocation(); CPPUNIT_TEST_SUITE(DocumentApiConverterTest); CPPUNIT_TEST(testPut); CPPUNIT_TEST(testForwardedPut); + CPPUNIT_TEST(testUpdate); CPPUNIT_TEST(testRemove); CPPUNIT_TEST(testGet); CPPUNIT_TEST(testCreateVisitor); @@ -68,6 +135,9 @@ struct DocumentApiConverterTest : public CppUnit::TestFixture CPPUNIT_TEST(testVisitorInfo); CPPUNIT_TEST(testMultiOperation); CPPUNIT_TEST(testBatchDocumentUpdate); + CPPUNIT_TEST(testStatBucket); + CPPUNIT_TEST(testGetBucketList); + CPPUNIT_TEST(testRemoveLocation); CPPUNIT_TEST_SUITE_END(); }; @@ -75,128 +145,126 @@ CPPUNIT_TEST_SUITE_REGISTRATION(DocumentApiConverterTest); void DocumentApiConverterTest::testPut() { - Document::SP doc(new Document(_html_type, DocumentId(DocIdString("test", "test")))); + auto doc = std::make_shared<Document>(_html_type, defaultDocId); documentapi::PutDocumentMessage putmsg(doc); putmsg.setTimestamp(1234); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(putmsg, _repo); - api::PutCommand* pc = dynamic_cast<api::PutCommand*>(cmd.get()); - - CPPUNIT_ASSERT(pc); - CPPUNIT_ASSERT(pc->getDocument().get() == doc.get()); + auto cmd = toStorageAPI<api::PutCommand>(putmsg); + CPPUNIT_ASSERT_EQUAL(defaultBucket, cmd->getBucket()); + CPPUNIT_ASSERT(cmd->getDocument().get() == doc.get()); std::unique_ptr<mbus::Reply> reply = putmsg.createReply(); CPPUNIT_ASSERT(reply.get()); - std::unique_ptr<storage::api::StorageReply> rep = _converter->toStorageAPI( - static_cast<documentapi::DocumentReply&>(*reply), *cmd); - api::PutReply* pr = dynamic_cast<api::PutReply*>(rep.get()); - CPPUNIT_ASSERT(pr); - - std::unique_ptr<mbus::Message> mbusmsg = _converter->toDocumentAPI(*pc, _repo); + toStorageAPI<api::PutReply>(*reply, *cmd); - documentapi::PutDocumentMessage* mbusput = dynamic_cast<documentapi::PutDocumentMessage*>(mbusmsg.get()); - CPPUNIT_ASSERT(mbusput); - CPPUNIT_ASSERT(mbusput->getDocumentSP().get() == doc.get()); - CPPUNIT_ASSERT(mbusput->getTimestamp() == 1234); -}; + auto mbusPut = toDocumentAPI<documentapi::PutDocumentMessage>(*cmd); + CPPUNIT_ASSERT(mbusPut->getDocumentSP().get() == doc.get()); + CPPUNIT_ASSERT(mbusPut->getTimestamp() == 1234); +} void DocumentApiConverterTest::testForwardedPut() { - Document::SP doc(new Document(_html_type, DocumentId(DocIdString("test", "test")))); + auto doc = std::make_shared<Document>(_html_type, DocumentId(DocIdString("test", "test"))); documentapi::PutDocumentMessage* putmsg = new documentapi::PutDocumentMessage(doc); std::unique_ptr<mbus::Reply> reply(((documentapi::DocumentMessage*)putmsg)->createReply()); reply->setMessage(std::unique_ptr<mbus::Message>(putmsg)); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(*putmsg, _repo); - ((storage::api::PutCommand*)cmd.get())->setTimestamp(1234); + auto cmd = toStorageAPI<api::PutCommand>(*putmsg); + cmd->setTimestamp(1234); - std::unique_ptr<storage::api::StorageReply> rep = cmd->makeReply(); - api::PutReply* pr = dynamic_cast<api::PutReply*>(rep.get()); - CPPUNIT_ASSERT(pr); + auto rep = dynamic_unique_ptr_cast<api::PutReply>(cmd->makeReply()); + _converter->transferReplyState(*rep, *reply); +} - _converter->transferReplyState(*pr, *reply); +void DocumentApiConverterTest::testUpdate() +{ + auto update = std::make_shared<document::DocumentUpdate>(_html_type, defaultDocId); + documentapi::UpdateDocumentMessage updateMsg(update); + updateMsg.setOldTimestamp(1234); + updateMsg.setNewTimestamp(5678); + + auto updateCmd = toStorageAPI<api::UpdateCommand>(updateMsg); + CPPUNIT_ASSERT_EQUAL(defaultBucket, updateCmd->getBucket()); + CPPUNIT_ASSERT_EQUAL(update.get(), updateCmd->getUpdate().get()); + CPPUNIT_ASSERT_EQUAL(api::Timestamp(1234), updateCmd->getOldTimestamp()); + CPPUNIT_ASSERT_EQUAL(api::Timestamp(5678), updateCmd->getTimestamp()); + + auto mbusReply = updateMsg.createReply(); + CPPUNIT_ASSERT(mbusReply.get()); + toStorageAPI<api::UpdateReply>(*mbusReply, *updateCmd); + + auto mbusUpdate = toDocumentAPI<documentapi::UpdateDocumentMessage>(*updateCmd); + CPPUNIT_ASSERT((&mbusUpdate->getDocumentUpdate()) == update.get()); + CPPUNIT_ASSERT_EQUAL(api::Timestamp(1234), mbusUpdate->getOldTimestamp()); + CPPUNIT_ASSERT_EQUAL(api::Timestamp(5678), mbusUpdate->getNewTimestamp()); } void DocumentApiConverterTest::testRemove() { - documentapi::RemoveDocumentMessage removemsg(document::DocumentId(document::DocIdString("test", "test"))); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(removemsg, _repo); - - api::RemoveCommand* rc = dynamic_cast<api::RemoveCommand*>(cmd.get()); - - CPPUNIT_ASSERT(rc); - CPPUNIT_ASSERT_EQUAL(document::DocumentId(document::DocIdString("test", "test")), rc->getDocumentId()); + documentapi::RemoveDocumentMessage removemsg(defaultDocId); + auto cmd = toStorageAPI<api::RemoveCommand>(removemsg); + CPPUNIT_ASSERT_EQUAL(defaultBucket, cmd->getBucket()); + CPPUNIT_ASSERT_EQUAL(defaultDocId, cmd->getDocumentId()); std::unique_ptr<mbus::Reply> reply = removemsg.createReply(); CPPUNIT_ASSERT(reply.get()); - std::unique_ptr<storage::api::StorageReply> rep = _converter->toStorageAPI( - static_cast<documentapi::DocumentReply&>(*reply), *cmd); - api::RemoveReply* pr = dynamic_cast<api::RemoveReply*>(rep.get()); - CPPUNIT_ASSERT(pr); - - std::unique_ptr<mbus::Message> mbusmsg = _converter->toDocumentAPI(*rc, _repo); + toStorageAPI<api::RemoveReply>(*reply, *cmd); - documentapi::RemoveDocumentMessage* mbusremove = dynamic_cast<documentapi::RemoveDocumentMessage*>(mbusmsg.get()); - CPPUNIT_ASSERT(mbusremove); - CPPUNIT_ASSERT_EQUAL(document::DocumentId(document::DocIdString("test", "test")), mbusremove->getDocumentId()); -}; + auto mbusRemove = toDocumentAPI<documentapi::RemoveDocumentMessage>(*cmd); + CPPUNIT_ASSERT_EQUAL(defaultDocId, mbusRemove->getDocumentId()); +} void DocumentApiConverterTest::testGet() { - documentapi::GetDocumentMessage getmsg( - document::DocumentId(document::DocIdString("test", "test")), "foo bar"); - - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(getmsg, _repo); + documentapi::GetDocumentMessage getmsg(defaultDocId, "foo bar"); - api::GetCommand* rc = dynamic_cast<api::GetCommand*>(cmd.get()); - - CPPUNIT_ASSERT(rc); - CPPUNIT_ASSERT_EQUAL(document::DocumentId(document::DocIdString("test", "test")), rc->getDocumentId()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("foo bar"), rc->getFieldSet()); -}; + auto cmd = toStorageAPI<api::GetCommand>(getmsg); + CPPUNIT_ASSERT_EQUAL(defaultBucket, cmd->getBucket()); + CPPUNIT_ASSERT_EQUAL(defaultDocId, cmd->getDocumentId()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("foo bar"), cmd->getFieldSet()); +} void DocumentApiConverterTest::testCreateVisitor() { documentapi::CreateVisitorMessage cv("mylib", "myinstance", "control-dest", "data-dest"); - + cv.setBucketSpace(defaultSpaceName); cv.setTimeRemaining(123456); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(cv, _repo); - api::CreateVisitorCommand* pc = dynamic_cast<api::CreateVisitorCommand*>(cmd.get()); - - CPPUNIT_ASSERT(pc); - CPPUNIT_ASSERT_EQUAL(vespalib::string("mylib"), pc->getLibraryName()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), pc->getInstanceId()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("control-dest"), pc->getControlDestination()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("data-dest"), pc->getDataDestination()); - CPPUNIT_ASSERT_EQUAL(123456u, pc->getTimeout()); + + auto cmd = toStorageAPI<api::CreateVisitorCommand>(cv); + CPPUNIT_ASSERT_EQUAL(defaultBucketSpace, cmd->getBucket().getBucketSpace()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("mylib"), cmd->getLibraryName()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), cmd->getInstanceId()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("control-dest"), cmd->getControlDestination()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("data-dest"), cmd->getDataDestination()); + CPPUNIT_ASSERT_EQUAL(123456u, cmd->getTimeout()); + + auto msg = toDocumentAPI<documentapi::CreateVisitorMessage>(*cmd); + CPPUNIT_ASSERT_EQUAL(defaultSpaceName, msg->getBucketSpace()); } void DocumentApiConverterTest::testCreateVisitorHighTimeout() { documentapi::CreateVisitorMessage cv("mylib", "myinstance", "control-dest", "data-dest"); cv.setTimeRemaining((uint64_t)std::numeric_limits<uint32_t>::max() + 1); // Will be INT_MAX - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(cv, _repo); - api::CreateVisitorCommand* pc = dynamic_cast<api::CreateVisitorCommand*>(cmd.get()); - - CPPUNIT_ASSERT(pc); - CPPUNIT_ASSERT_EQUAL(vespalib::string("mylib"), pc->getLibraryName()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), pc->getInstanceId()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("control-dest"), pc->getControlDestination()); - CPPUNIT_ASSERT_EQUAL(vespalib::string("data-dest"), pc->getDataDestination()); - CPPUNIT_ASSERT_EQUAL((uint32_t) std::numeric_limits<int32_t>::max(), pc->getTimeout()); + + auto cmd = toStorageAPI<api::CreateVisitorCommand>(cv); + CPPUNIT_ASSERT_EQUAL(vespalib::string("mylib"), cmd->getLibraryName()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), cmd->getInstanceId()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("control-dest"), cmd->getControlDestination()); + CPPUNIT_ASSERT_EQUAL(vespalib::string("data-dest"), cmd->getDataDestination()); + CPPUNIT_ASSERT_EQUAL((uint32_t) std::numeric_limits<int32_t>::max(), cmd->getTimeout()); } void DocumentApiConverterTest::testCreateVisitorReplyNotReady() { documentapi::CreateVisitorMessage cv("mylib", "myinstance", "control-dest", "data-dest"); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(cv, _repo); - CPPUNIT_ASSERT(cmd.get()); - api::CreateVisitorCommand& cvc = dynamic_cast<api::CreateVisitorCommand&>(*cmd); - api::CreateVisitorReply cvr(cvc); + + auto cmd = toStorageAPI<api::CreateVisitorCommand>(cv); + api::CreateVisitorReply cvr(*cmd); cvr.setResult(api::ReturnCode(api::ReturnCode::NOT_READY, "not ready")); std::unique_ptr<documentapi::CreateVisitorReply> reply( @@ -207,14 +275,12 @@ void DocumentApiConverterTest::testCreateVisitorReplyNotReady() CPPUNIT_ASSERT_EQUAL(document::BucketId(std::numeric_limits<int>::max()), reply->getLastBucket()); } - void DocumentApiConverterTest::testCreateVisitorReplyLastBucket() { documentapi::CreateVisitorMessage cv("mylib", "myinstance", "control-dest", "data-dest"); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(cv, _repo); - CPPUNIT_ASSERT(cmd.get()); - api::CreateVisitorCommand& cvc = dynamic_cast<api::CreateVisitorCommand&>(*cmd); - api::CreateVisitorReply cvr(cvc); + + auto cmd = toStorageAPI<api::CreateVisitorCommand>(cv); + api::CreateVisitorReply cvr(*cmd); cvr.setLastBucket(document::BucketId(123)); std::unique_ptr<documentapi::CreateVisitorReply> reply( dynamic_cast<documentapi::CreateVisitorReply*>(cv.createReply().release())); @@ -224,17 +290,12 @@ void DocumentApiConverterTest::testCreateVisitorReplyLastBucket() CPPUNIT_ASSERT_EQUAL(document::BucketId(123), reply->getLastBucket()); } - void DocumentApiConverterTest::testDestroyVisitor() { documentapi::DestroyVisitorMessage cv("myinstance"); - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(cv, _repo); - - api::DestroyVisitorCommand* pc = dynamic_cast<api::DestroyVisitorCommand*>(cmd.get()); - - CPPUNIT_ASSERT(pc); - CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), pc->getInstanceId()); + auto cmd = toStorageAPI<api::DestroyVisitorCommand>(cv); + CPPUNIT_ASSERT_EQUAL(vespalib::string("myinstance"), cmd->getInstanceId()); } void @@ -248,10 +309,7 @@ DocumentApiConverterTest::testVisitorInfo() vicmd.setBucketsCompleted(bucketsCompleted); - std::unique_ptr<mbus::Message> mbusmsg = _converter->toDocumentAPI(vicmd, _repo); - - documentapi::VisitorInfoMessage* mbusvi = dynamic_cast<documentapi::VisitorInfoMessage*>(mbusmsg.get()); - CPPUNIT_ASSERT(mbusvi); + auto mbusvi = toDocumentAPI<documentapi::VisitorInfoMessage>(vicmd); CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 1), mbusvi->getFinishedBuckets()[0]); CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 2), mbusvi->getFinishedBuckets()[1]); CPPUNIT_ASSERT_EQUAL(document::BucketId(16, 4), mbusvi->getFinishedBuckets()[2]); @@ -259,17 +317,13 @@ DocumentApiConverterTest::testVisitorInfo() std::unique_ptr<mbus::Reply> reply = mbusvi->createReply(); CPPUNIT_ASSERT(reply.get()); - std::unique_ptr<storage::api::StorageReply> rep = _converter->toStorageAPI( - static_cast<documentapi::DocumentReply&>(*reply), vicmd); - api::VisitorInfoReply* pr = dynamic_cast<api::VisitorInfoReply*>(rep.get()); - CPPUNIT_ASSERT(pr); + toStorageAPI<api::VisitorInfoReply>(*reply, vicmd); } void DocumentApiConverterTest::testMultiOperation() { - //create a document - Document::SP doc(new Document(_html_type, DocumentId(DocIdString("test", "test")))); + auto doc = std::make_shared<Document>(_html_type, DocumentId(DocIdString("test", "test"))); document::BucketIdFactory fac; document::BucketId bucketId = fac.getBucketId(doc->getId()); @@ -284,10 +338,7 @@ DocumentApiConverterTest::testMultiOperation() CPPUNIT_ASSERT(momsg.getBuffer().size() > 0); // Convert it to Storage API - std::unique_ptr<api::StorageCommand> stcmd = _converter->toStorageAPI(momsg, _repo); - - api::MultiOperationCommand* mocmd = dynamic_cast<api::MultiOperationCommand*>(stcmd.get()); - CPPUNIT_ASSERT(mocmd); + auto mocmd = toStorageAPI<api::MultiOperationCommand>(momsg); CPPUNIT_ASSERT(mocmd->getBuffer().size() > 0); // Get operations from Storage API message and check document @@ -296,7 +347,7 @@ DocumentApiConverterTest::testMultiOperation() CPPUNIT_ASSERT_EQUAL(*doc, *dynamic_cast<document::Document*>(list.begin()->getDocument().get())); // Create Storage API Reply - std::unique_ptr<api::MultiOperationReply> moreply = std::unique_ptr<api::MultiOperationReply>(new api::MultiOperationReply(*mocmd)); + auto moreply = std::make_unique<api::MultiOperationReply>(*mocmd); CPPUNIT_ASSERT(moreply.get()); // convert storage api reply to mbus reply..... @@ -308,9 +359,7 @@ DocumentApiConverterTest::testMultiOperation() mocmd.getOperations().addPut(*doc, 100); // Convert it to documentapi - std::unique_ptr<mbus::Message> mbmsg = _converter->toDocumentAPI(mocmd, _repo); - documentapi::MultiOperationMessage* momsg = dynamic_cast<documentapi::MultiOperationMessage*>(mbmsg.get()); - CPPUNIT_ASSERT(momsg); + auto momsg = toDocumentAPI<documentapi::MultiOperationMessage>(mocmd); // Get operations from Document API msg and check document const vdslib::DocumentList& list = momsg->getOperations(); @@ -322,11 +371,7 @@ DocumentApiConverterTest::testMultiOperation() CPPUNIT_ASSERT(moreply.get()); //Convert DocumentAPI reply to storageapi reply - std::unique_ptr<api::StorageReply> streply = - _converter->toStorageAPI(static_cast<documentapi::DocumentReply&>(*moreply), mocmd); - api::MultiOperationReply* mostreply = dynamic_cast<api::MultiOperationReply*>(streply.get()); - CPPUNIT_ASSERT(mostreply); - + toStorageAPI<api::MultiOperationReply>(*moreply, mocmd); } } @@ -337,19 +382,19 @@ DocumentApiConverterTest::testBatchDocumentUpdate() { document::DocumentId docId(document::UserDocIdString("userdoc:test:1234:test1")); - document::DocumentUpdate::SP update(new document::DocumentUpdate(_html_type, docId)); + auto update = std::make_shared<document::DocumentUpdate>(_html_type, docId); updates.push_back(update); } { document::DocumentId docId(document::UserDocIdString("userdoc:test:1234:test2")); - document::DocumentUpdate::SP update(new document::DocumentUpdate(_html_type, docId)); + auto update = std::make_shared<document::DocumentUpdate>(_html_type, docId); updates.push_back(update); } { document::DocumentId docId(document::UserDocIdString("userdoc:test:1234:test3")); - document::DocumentUpdate::SP update(new document::DocumentUpdate(_html_type, docId)); + auto update = std::make_shared<document::DocumentUpdate>(_html_type, docId); updates.push_back(update); } @@ -358,9 +403,7 @@ DocumentApiConverterTest::testBatchDocumentUpdate() msg->addUpdate(updates[i]); } - std::unique_ptr<storage::api::StorageCommand> cmd = _converter->toStorageAPI(*msg, _repo); - api::BatchDocumentUpdateCommand* batchCmd = dynamic_cast<api::BatchDocumentUpdateCommand*>(cmd.get()); - CPPUNIT_ASSERT(batchCmd); + auto batchCmd = toStorageAPI<api::BatchDocumentUpdateCommand>(*msg); CPPUNIT_ASSERT_EQUAL(updates.size(), batchCmd->getUpdates().size()); for (std::size_t i = 0; i < updates.size(); ++i) { CPPUNIT_ASSERT_EQUAL(*updates[i], *batchCmd->getUpdates()[i]); @@ -384,4 +427,40 @@ DocumentApiConverterTest::testBatchDocumentUpdate() CPPUNIT_ASSERT(mbusBatchReply->getDocumentsNotFound()[2] == true); } +void +DocumentApiConverterTest::testStatBucket() +{ + documentapi::StatBucketMessage msg(BucketId(123), ""); + msg.setBucketSpace(defaultSpaceName); + + auto cmd = toStorageAPI<api::StatBucketCommand>(msg); + CPPUNIT_ASSERT_EQUAL(Bucket(defaultBucketSpace, BucketId(123)), cmd->getBucket()); + + auto mbusMsg = toDocumentAPI<documentapi::StatBucketMessage>(*cmd); + CPPUNIT_ASSERT_EQUAL(BucketId(123), mbusMsg->getBucketId()); + CPPUNIT_ASSERT_EQUAL(defaultSpaceName, mbusMsg->getBucketSpace()); +} + +void +DocumentApiConverterTest::testGetBucketList() +{ + documentapi::GetBucketListMessage msg(BucketId(123)); + msg.setBucketSpace(defaultSpaceName); + + auto cmd = toStorageAPI<api::GetBucketListCommand>(msg); + CPPUNIT_ASSERT_EQUAL(Bucket(defaultBucketSpace, BucketId(123)), cmd->getBucket()); +} + +void +DocumentApiConverterTest::testRemoveLocation() +{ + document::BucketIdFactory factory; + document::select::Parser parser(*_repo, factory); + documentapi::RemoveLocationMessage msg(factory, parser, "id.group == \"mygroup\""); + msg.setBucketSpace(defaultSpaceName); + + auto cmd = toStorageAPI<api::RemoveLocationCommand>(msg); + CPPUNIT_ASSERT_EQUAL(defaultBucket, cmd->getBucket()); +} + } diff --git a/storage/src/tests/storageserver/mergethrottlertest.cpp b/storage/src/tests/storageserver/mergethrottlertest.cpp index 7d04714e9a8..3d469fc4252 100644 --- a/storage/src/tests/storageserver/mergethrottlertest.cpp +++ b/storage/src/tests/storageserver/mergethrottlertest.cpp @@ -313,7 +313,7 @@ MergeThrottlerTest::testChain() _servers[i]->setClusterState(lib::ClusterState("distributor:100 storage:100 version:123")); } - BucketId bid(14, 0x1337); + Bucket bucket(makeDocumentBucket(BucketId(14, 0x1337))); // Use different node permutations to ensure it works no matter which node is // set as the executor. More specifically, _all_ permutations. @@ -321,15 +321,11 @@ MergeThrottlerTest::testChain() uint16_t lastNodeIdx = _storageNodeCount - 1; uint16_t executorNode = indices[0]; - //std::cout << "\n----\n"; std::vector<MergeBucketCommand::Node> nodes; for (int i = 0; i < _storageNodeCount; ++i) { nodes.push_back(MergeBucketCommand::Node(indices[i], (i + executorNode) % 2 == 0)); - //std::cout << indices[i] << " "; } - //std::cout << "\n"; - std::shared_ptr<MergeBucketCommand> cmd( - new MergeBucketCommand(makeDocumentBucket(bid), nodes, UINT_MAX, 123)); + auto cmd = std::make_shared<MergeBucketCommand>(bucket, nodes, UINT_MAX, 123); cmd->setPriority(7); cmd->setTimeout(54321); StorageMessageAddress address("storage", lib::NodeType::STORAGE, 0); @@ -351,8 +347,6 @@ MergeThrottlerTest::testChain() _topLinks[i]->sendDown(fwd); _topLinks[i]->waitForMessage(MessageType::MERGEBUCKET, _messageWaitTime); - //std::cout << "fwd " << i << " -> " << i+1 << "\n"; - // Forwarded merge should not be sent down. Should not be necessary // to lock throttler here, since it should be sleeping like a champion CPPUNIT_ASSERT_EQUAL(std::size_t(0), _bottomLinks[i]->getNumCommands()); @@ -363,7 +357,6 @@ MergeThrottlerTest::testChain() CPPUNIT_ASSERT_EQUAL(uint16_t(i + 1), fwd->getAddress()->getIndex()); CPPUNIT_ASSERT_EQUAL(distributorIndex, dynamic_cast<const StorageCommand&>(*fwd).getSourceIndex()); { - //uint16_t chain[] = { 0 }; std::vector<uint16_t> chain; for (int j = 0; j <= i; ++j) { chain.push_back(j); @@ -416,10 +409,10 @@ MergeThrottlerTest::testChain() // The MergeBucketCommand that is kept in the executor node should // be the one from the node it initially got it from, NOT the one // from the last node, since the chain has looped - CPPUNIT_ASSERT(_throttlers[executorNode]->getActiveMerges().find(bid) + CPPUNIT_ASSERT(_throttlers[executorNode]->getActiveMerges().find(bucket) != _throttlers[executorNode]->getActiveMerges().end()); CPPUNIT_ASSERT_EQUAL(static_cast<StorageMessage*>(fwdToExec.get()), - _throttlers[executorNode]->getActiveMerges().find(bid)->second.getMergeCmd().get()); + _throttlers[executorNode]->getActiveMerges().find(bucket)->second.getMergeCmd().get()); } // Send reply up from persistence layer to simulate a completed @@ -440,7 +433,7 @@ MergeThrottlerTest::testChain() // Merge should not be removed yet from executor, since it's pending an unwind CPPUNIT_ASSERT_EQUAL(std::size_t(1), _throttlers[executorNode]->getActiveMerges().size()); CPPUNIT_ASSERT_EQUAL(static_cast<StorageMessage*>(fwdToExec.get()), - _throttlers[executorNode]->getActiveMerges().find(bid)->second.getMergeCmd().get()); + _throttlers[executorNode]->getActiveMerges().find(bucket)->second.getMergeCmd().get()); } // MergeBucketReply waiting to be sent back to node 2. NOTE: we don't have any // transport context stuff set up here to perform the reply mapping, so we @@ -452,8 +445,6 @@ MergeThrottlerTest::testChain() // eg: 0 -> 2 -> 1 -> 0. Or: 2 -> 1 -> 0 if no cycle for (int i = (executorNode != lastNodeIdx ? _storageNodeCount - 1 : _storageNodeCount - 2); i >= 0; --i) { - //std::cout << "unwind " << i << "\n"; - _topLinks[i]->sendDown(unwind); _topLinks[i]->waitForMessage(MessageType::MERGEBUCKET_REPLY, _messageWaitTime); @@ -469,7 +460,7 @@ MergeThrottlerTest::testChain() CPPUNIT_ASSERT_EQUAL(ReturnCode::OK, mbr.getResult().getResult()); CPPUNIT_ASSERT_EQUAL(vespalib::string("Great success! :D-|-<"), mbr.getResult().getMessage()); - CPPUNIT_ASSERT_EQUAL(bid, mbr.getBucketId()); + CPPUNIT_ASSERT_EQUAL(bucket, mbr.getBucket()); } while (std::next_permutation(indices, indices + _storageNodeCount)); diff --git a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp index 5e7cf4af046..00fa5c95c9b 100644 --- a/storage/src/vespa/storage/bucketdb/bucketmanager.cpp +++ b/storage/src/vespa/storage/bucketdb/bucketmanager.cpp @@ -104,14 +104,14 @@ namespace { DistributorStateCache _state; std::unordered_map<uint16_t, ResultArray>& _result; const document::BucketIdFactory& _factory; - std::shared_ptr<lib::Distribution> _storageDistribution; + std::shared_ptr<const lib::Distribution> _storageDistribution; public: DistributorInfoGatherer( const lib::ClusterState& systemState, std::unordered_map<uint16_t, ResultArray>& result, const document::BucketIdFactory& factory, - std::shared_ptr<lib::Distribution> distribution) + std::shared_ptr<const lib::Distribution> distribution) : _state(*distribution, systemState), _result(result), _factory(factory), @@ -513,7 +513,7 @@ BucketManager::processRequestBucketInfoCommands(document::BucketSpace bucketSpac typedef std::shared_ptr<api::RequestBucketInfoCommand> RBISP; std::map<uint16_t, RBISP> requests; - lib::Distribution::SP distribution(_component.getDistribution()); + auto distribution(_component.getBucketSpaceRepo().get(bucketSpace).getDistribution()); lib::ClusterState::CSP clusterState( _component.getStateUpdater().getSystemState()); assert(clusterState.get()); diff --git a/storage/src/vespa/storage/bucketdb/lockablemap.h b/storage/src/vespa/storage/bucketdb/lockablemap.h index 03d94b27f0b..a4382ceb683 100644 --- a/storage/src/vespa/storage/bucketdb/lockablemap.h +++ b/storage/src/vespa/storage/bucketdb/lockablemap.h @@ -16,10 +16,12 @@ #include <map> #include <vespa/vespalib/util/printable.h> -#include <vespa/vespalib/util/sync.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/stllike/hash_set.h> #include <vespa/document/bucket/bucketid.h> +#include <mutex> +#include <condition_variable> +#include <cassert> namespace storage { @@ -238,7 +240,8 @@ private: }; Map _map; - vespalib::Monitor _lock; + mutable std::mutex _lock; + std::condition_variable _cond; LockIdSet _lockedKeys; LockWaiters _lockWaiters; @@ -247,9 +250,9 @@ private: const char* clientId, bool haslock, bool& preExisted); void unlock(const key_type& key); bool findNextKey(key_type& key, mapped_type& val, const char* clientId, - vespalib::MonitorGuard& guard); + std::unique_lock<std::mutex> &guard); bool handleDecision(key_type& key, mapped_type& val, Decision decision); - void ackquireKey(const LockId & lid, vespalib::MonitorGuard & guard); + void acquireKey(const LockId & lid, std::unique_lock<std::mutex> &guard); /** * Process up to `chunkSize` bucket database entries from--and possibly @@ -304,7 +307,7 @@ private: void addAndLockResults(const std::vector<BucketId::Type> keys, const char* clientId, std::map<BucketId, WrappedEntry>& results, - vespalib::MonitorGuard& guard); + std::unique_lock<std::mutex> &guard); }; } // storage diff --git a/storage/src/vespa/storage/bucketdb/lockablemap.hpp b/storage/src/vespa/storage/bucketdb/lockablemap.hpp index f5d692139be..f370a792145 100644 --- a/storage/src/vespa/storage/bucketdb/lockablemap.hpp +++ b/storage/src/vespa/storage/bucketdb/lockablemap.hpp @@ -69,6 +69,7 @@ template<typename Map> LockableMap<Map>::LockableMap() : _map(), _lock(), + _cond(), _lockedKeys(), _lockWaiters() {} @@ -80,8 +81,8 @@ template<typename Map> bool LockableMap<Map>::operator==(const LockableMap<Map>& other) const { - vespalib::LockGuard guard(_lock); - vespalib::LockGuard guard2(other._lock); + std::lock_guard<std::mutex> guard(_lock); + std::lock_guard<std::mutex> guard2(other._lock); return (_map == other._map); } @@ -89,8 +90,8 @@ template<typename Map> bool LockableMap<Map>::operator<(const LockableMap<Map>& other) const { - vespalib::LockGuard guard(_lock); - vespalib::LockGuard guard2(other._lock); + std::lock_guard<std::mutex> guard(_lock); + std::lock_guard<std::mutex> guard2(other._lock); return (_map < other._map); } @@ -98,7 +99,7 @@ template<typename Map> typename Map::size_type LockableMap<Map>::size() const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); return _map.size(); } @@ -106,17 +107,16 @@ template<typename Map> typename Map::size_type LockableMap<Map>::getMemoryUsage() const { - vespalib::MonitorGuard guard(_lock); - return _map.getMemoryUsage() - + _lockedKeys.getMemoryUsage() - + sizeof(vespalib::Monitor); + std::lock_guard<std::mutex> guard(_lock); + return _map.getMemoryUsage() + _lockedKeys.getMemoryUsage() + + sizeof(std::mutex) + sizeof(std::condition_variable); } template<typename Map> bool LockableMap<Map>::empty() const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); return _map.empty(); } @@ -124,18 +124,18 @@ template<typename Map> void LockableMap<Map>::swap(LockableMap<Map>& other) { - vespalib::LockGuard guard(_lock); - vespalib::LockGuard guard2(other._lock); + std::lock_guard<std::mutex> guard(_lock); + std::lock_guard<std::mutex> guard2(other._lock); return _map.swap(other._map); } template<typename Map> -void LockableMap<Map>::ackquireKey(const LockId & lid, vespalib::MonitorGuard & guard) +void LockableMap<Map>::acquireKey(const LockId & lid, std::unique_lock<std::mutex> &guard) { if (_lockedKeys.exist(lid)) { typename LockWaiters::Key waitId(_lockWaiters.insert(lid)); while (_lockedKeys.exist(lid)) { - guard.wait(); + _cond.wait(guard); } _lockWaiters.erase(waitId); } @@ -148,8 +148,8 @@ LockableMap<Map>::get(const key_type& key, const char* clientId, bool lockIfNonExistingAndNotCreating) { LockId lid(key, clientId); - vespalib::MonitorGuard guard(_lock); - ackquireKey(lid, guard); + std::unique_lock<std::mutex> guard(_lock); + acquireKey(lid, guard); bool preExisted = false; typename Map::iterator it = _map.find(key, createIfNonExisting, preExisted); @@ -197,9 +197,9 @@ bool LockableMap<Map>::erase(const key_type& key, const char* clientId, bool haslock) { LockId lid(key, clientId); - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); if (!haslock) { - ackquireKey(lid, guard); + acquireKey(lid, guard); } #ifdef ENABLE_BUCKET_OPERATION_LOGGING debug::logBucketDbErase(key, debug::TypeTag<mapped_type>()); @@ -213,9 +213,9 @@ LockableMap<Map>::insert(const key_type& key, const mapped_type& value, const char* clientId, bool haslock, bool& preExisted) { LockId lid(key, clientId); - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); if (!haslock) { - ackquireKey(lid, guard); + acquireKey(lid, guard); } #ifdef ENABLE_BUCKET_OPERATION_LOGGING debug::logBucketDbInsert(key, value); @@ -227,7 +227,7 @@ template<typename Map> void LockableMap<Map>::clear() { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); _map.clear(); } @@ -235,13 +235,13 @@ template<typename Map> bool LockableMap<Map>::findNextKey(key_type& key, mapped_type& val, const char* clientId, - vespalib::MonitorGuard& guard) + std::unique_lock<std::mutex> &guard) { // Wait for next value to unlock. typename Map::iterator it(_map.lower_bound(key)); while (it != _map.end() && _lockedKeys.exist(LockId(it->first, ""))) { typename LockWaiters::Key waitId(_lockWaiters.insert(LockId(it->first, clientId))); - guard.wait(); + _cond.wait(guard); _lockWaiters.erase(waitId); it = _map.lower_bound(key); } @@ -279,16 +279,16 @@ LockableMap<Map>::each(Functor& functor, const char* clientId, mapped_type val; Decision decision; { - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); if (findNextKey(key, val, clientId, guard) || key > last) return; _lockedKeys.insert(LockId(key, clientId)); } try{ while (true) { decision = functor(const_cast<const key_type&>(key), val); - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); _lockedKeys.erase(LockId(key, clientId)); - guard.broadcast(); + _cond.notify_all(); if (handleDecision(key, val, decision)) return; ++key; if (findNextKey(key, val, clientId, guard) || key > last) return; @@ -297,9 +297,9 @@ LockableMap<Map>::each(Functor& functor, const char* clientId, } catch (...) { // Assuming only the functor call can throw exceptions, we need // to unlock the current key before exiting - vespalib::MonitorGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); _lockedKeys.erase(LockId(key, clientId)); - guard.broadcast(); + _cond.notify_all(); throw; } } @@ -314,16 +314,16 @@ LockableMap<Map>::each(const Functor& functor, const char* clientId, mapped_type val; Decision decision; { - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); if (findNextKey(key, val, clientId, guard) || key > last) return; _lockedKeys.insert(LockId(key, clientId)); } try{ while (true) { decision = functor(const_cast<const key_type&>(key), val); - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); _lockedKeys.erase(LockId(key, clientId)); - guard.broadcast(); + _cond.notify_all(); if (handleDecision(key, val, decision)) return; ++key; if (findNextKey(key, val, clientId, guard) || key > last) return; @@ -332,9 +332,9 @@ LockableMap<Map>::each(const Functor& functor, const char* clientId, } catch (...) { // Assuming only the functor call can throw exceptions, we need // to unlock the current key before exiting - vespalib::MonitorGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); _lockedKeys.erase(LockId(key, clientId)); - guard.broadcast(); + _cond.notify_all(); throw; } } @@ -347,7 +347,7 @@ LockableMap<Map>::all(Functor& functor, const char* clientId, { key_type key = first; mapped_type val; - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); while (true) { if (findNextKey(key, val, clientId, guard) || key > last) return; Decision d(functor(const_cast<const key_type&>(key), val)); @@ -364,7 +364,7 @@ LockableMap<Map>::all(const Functor& functor, const char* clientId, { key_type key = first; mapped_type val; - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); while (true) { if (findNextKey(key, val, clientId, guard) || key > last) return; Decision d(functor(const_cast<const key_type&>(key), val)); @@ -383,7 +383,7 @@ LockableMap<Map>::processNextChunk(Functor& functor, const uint32_t chunkSize) { mapped_type val; - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); for (uint32_t processed = 0; processed < chunkSize; ++processed) { if (findNextKey(key, val, clientId, guard)) { return false; @@ -422,7 +422,7 @@ void LockableMap<Map>::print(std::ostream& out, bool verbose, const std::string& indent) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); out << "LockableMap {\n" << indent << " "; if (verbose) { @@ -462,9 +462,9 @@ template<typename Map> void LockableMap<Map>::unlock(const key_type& key) { - vespalib::MonitorGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); _lockedKeys.erase(LockId(key, "")); - guard.broadcast(); + _cond.notify_all(); } /** @@ -550,7 +550,7 @@ LockableMap<Map>::addAndLockResults( const std::vector<BucketId::Type> keys, const char* clientId, std::map<BucketId, WrappedEntry>& results, - vespalib::MonitorGuard& guard) + std::unique_lock<std::mutex> &guard) { // Wait until all buckets are free to be added, then add them all. while (true) { @@ -567,7 +567,7 @@ LockableMap<Map>::addAndLockResults( if (!allOk) { typename LockWaiters::Key waitId(_lockWaiters.insert(LockId(waitingFor, clientId))); - guard.wait(); + _cond.wait(guard); _lockWaiters.erase(waitId); } else { for (uint32_t i=0; i<keys.size(); i++) { @@ -593,7 +593,7 @@ LockableMap<Map>::createAppropriateBucket( const char* clientId, const BucketId& bucket) { - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); typename Map::const_iterator iter = _map.lower_bound(bucket.toKey()); // Find the two buckets around the possible new bucket. The new @@ -613,7 +613,7 @@ LockableMap<Map>::createAppropriateBucket( BucketId::Type key = newBucket.stripUnused().toKey(); LockId lid(key, clientId); - ackquireKey(lid, guard); + acquireKey(lid, guard); bool preExisted; typename Map::iterator it = _map.find(key, true, preExisted); _lockedKeys.insert(LockId(key, clientId)); @@ -625,7 +625,7 @@ std::map<document::BucketId, typename LockableMap<Map>::WrappedEntry> LockableMap<Map>::getContained(const BucketId& bucket, const char* clientId) { - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); std::map<BucketId, WrappedEntry> results; BucketId result; @@ -718,7 +718,7 @@ std::map<document::BucketId, typename LockableMap<Map>::WrappedEntry> LockableMap<Map>::getAll(const BucketId& bucket, const char* clientId, const BucketId& sibling) { - vespalib::MonitorGuard guard(_lock); + std::unique_lock<std::mutex> guard(_lock); std::map<BucketId, WrappedEntry> results; std::vector<BucketId::Type> keys; @@ -734,7 +734,7 @@ template<typename Map> bool LockableMap<Map>::isConsistent(const typename LockableMap<Map>::WrappedEntry& entry) { - vespalib::MonitorGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); BucketId sibling(0); std::vector<BucketId::Type> keys; @@ -750,7 +750,7 @@ template<typename Map> void LockableMap<Map>::showLockClients(vespalib::asciistream & out) const { - vespalib::MonitorGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); out << "Currently grabbed locks:"; for (typename LockIdSet::const_iterator it = _lockedKeys.begin(); it != _lockedKeys.end(); ++it) diff --git a/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.cpp b/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.cpp index 3a832f0fe3b..fc2c2066b6f 100644 --- a/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.cpp +++ b/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.cpp @@ -5,6 +5,7 @@ #include "config-stor-bucket-init.h" #include "storbucketdb.h" #include <vespa/storage/common/nodestateupdater.h> +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/storageserver/storagemetricsset.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/io/fileutil.h> @@ -65,9 +66,8 @@ StorageBucketDBInitializer::System::System( : _doneInitializeHandler(doneInitializeHandler), _component(compReg, "storagebucketdbinitializer"), _partitions(partitions), - _bucketDatabase(_component.getBucketDatabase(BucketSpace::placeHolder())), + _bucketSpaceRepo(_component.getBucketSpaceRepo()), _nodeIndex(_component.getIndex()), - _distribution(*_component.getDistribution()), _nodeState() { // Is this correct? We should get the node state from the node state updater @@ -82,6 +82,12 @@ StorageBucketDBInitializer::System::System( } } +StorBucketDatabase & +StorageBucketDBInitializer::System::getBucketDatabase(document::BucketSpace bucketSpace) const +{ + return _component.getBucketDatabase(bucketSpace); +} + StorageBucketDBInitializer::Metrics::Metrics(framework::Component& component) : metrics::MetricSet("dbinit", "", "Metrics for the storage bucket database initializer"), @@ -134,7 +140,10 @@ StorageBucketDBInitializer::StorageBucketDBInitializer( // Initialize read state for disks being available for (uint32_t i=0; i<_system._partitions.size(); ++i) { if (!_system._partitions[i].isUp()) continue; - _readState[i] = BucketReadState::UP(new BucketReadState); + _readState[i] = std::make_unique<BucketSpaceReadState>(); + for (const auto &elem : _system._bucketSpaceRepo) { + _readState[i]->emplace(elem.first, std::make_unique<BucketReadState>()); + } _state._dirsToList += 1; } _system._component.registerStatusPage(*this); @@ -155,9 +164,14 @@ StorageBucketDBInitializer::onOpen() // Trigger bucket database initialization for (uint32_t i=0; i<_system._partitions.size(); ++i) { if (!_system._partitions[i].isUp()) continue; - ReadBucketList::SP msg(new ReadBucketList(BucketSpace::placeHolder(), spi::PartitionId(i))); - _state._lists[msg->getMsgId()] = msg; - sendDown(msg); + assert(_readState[i]); + const BucketSpaceReadState &spaceState = *_readState[i]; + for (const auto &stateElem : spaceState) { + document::BucketSpace bucketSpace = stateElem.first; + auto msg = std::make_shared<ReadBucketList>(bucketSpace, spi::PartitionId(i)); + _state._lists[msg->getMsgId()] = msg; + sendDown(msg); + } } framework::MilliSecTime maxProcessingTime(10); framework::MilliSecTime sleepTime(1000); @@ -220,6 +234,26 @@ StorageBucketDBInitializer::print( out << "StorageBucketDBInitializer()"; } +namespace { + +size_t +notDoneCount(const StorageBucketDBInitializer::ReadState &readState) +{ + size_t result = 0; + for (const auto &elem : readState) { + if (elem) { + for (const auto &stateElem : *elem) { + if (!stateElem.second->_done) { + ++result; + } + } + } + } + return result; +} + +} + void StorageBucketDBInitializer::reportHtmlStatus( std::ostream& out, const framework::HttpUrlPath&) const @@ -261,10 +295,7 @@ StorageBucketDBInitializer::reportHtmlStatus( out << " " << _state._infoRequests.size() << " info requests pending.<br/>\n"; } - uint32_t incompleteScan = 0; - for (uint32_t i=0; i<_readState.size(); ++i) { - if (_readState[i].get() != 0 && !_readState[i]->_done) ++incompleteScan; - } + uint32_t incompleteScan = notDoneCount(_readState); if (incompleteScan == 0) { out << " Done iterating bucket database to generate info " << "requests.<br/>\n"; @@ -304,29 +335,31 @@ StorageBucketDBInitializer::reportHtmlStatus( out << " <h3>Disk " << i << " is down</h3>\n"; continue; } - BucketReadState& state(*_readState[i]); - out << " <h3>Disk " << i << "</h3>\n"; - out << " Pending info requests: " << pendingCounts[i] << " ("; - if (state._pending.empty()) { - out << "none"; - } else { - bool first = true; - for (BucketSet::const_iterator it = state._pending.begin(); - it != state._pending.end(); ++it) - { - if (!first) { - out << ", "; - } else { - first = false; + const BucketSpaceReadState& spaceState(*_readState[i]); + for (const auto &stateElem : spaceState) { + const BucketReadState &state = *stateElem.second; + out << " <h3>Disk " << i << ", bucket space " << stateElem.first.getId() << "</h3>\n"; + out << " Pending info requests: " << pendingCounts[i] << " ("; + if (state._pending.empty()) { + out << "none"; + } else { + bool first = true; + for (BucketSet::const_iterator it = state._pending.begin(); + it != state._pending.end(); ++it) { + if (!first) { + out << ", "; + } else { + first = false; + } + out << *it; } - out << *it; } + out << ")<br/>\n"; + out << " Bucket database iterator: " << state._databaseIterator + << "<br/>\n"; + out << " Done iterating bucket database. " + << (state._done ? "true" : "false") << "<br/>\n"; } - out << ")<br/>\n"; - out << " Bucked database iterator: " << state._databaseIterator - << "<br/>\n"; - out << " Done iterating bucket database. " - << (state._done ? "true" : "false") << "<br/>\n"; } for (std::map<Disk, uint32_t>::iterator it = pendingCounts.begin(); it != pendingCounts.end(); ++it) @@ -338,11 +371,12 @@ StorageBucketDBInitializer::reportHtmlStatus( // Always called from worker thread. Worker monitor already grabbed void StorageBucketDBInitializer::registerBucket(const document::Bucket &bucket, + const lib::Distribution &distribution, spi::PartitionId partition, api::BucketInfo bucketInfo) { document::BucketId bucketId(bucket.getBucketId()); - StorBucketDatabase::WrappedEntry entry(_system._bucketDatabase.get( + StorBucketDatabase::WrappedEntry entry(_system.getBucketDatabase(bucket.getBucketSpace()).get( bucketId, "StorageBucketDBInitializer::registerBucket", StorBucketDatabase::CREATE_IF_NONEXISTING)); if (bucketInfo.valid()) { @@ -369,7 +403,7 @@ StorageBucketDBInitializer::registerBucket(const document::Bucket &bucket, return; } uint32_t keepOnDisk, joinFromDisk; - if (_system._distribution.getPreferredAvailableDisk( + if (distribution.getPreferredAvailableDisk( _system._nodeState, _system._nodeIndex, bucketId.stripUnused()) == partition) { @@ -384,8 +418,7 @@ StorageBucketDBInitializer::registerBucket(const document::Bucket &bucket, bucketId.toString().c_str(), entry->disk, int(partition), keepOnDisk); entry.unlock(); // Must not have bucket db lock while sending down - InternalBucketJoinCommand::SP cmd(new InternalBucketJoinCommand( - bucket, keepOnDisk, joinFromDisk)); + auto cmd = std::make_shared<InternalBucketJoinCommand>(bucket, keepOnDisk, joinFromDisk); { _state._joins[cmd->getMsgId()] = cmd; } @@ -396,7 +429,7 @@ StorageBucketDBInitializer::registerBucket(const document::Bucket &bucket, bucketId.toString().c_str(), int(partition)); entry->disk = partition; entry.write(); - uint16_t disk(_system._distribution.getIdealDisk( + uint16_t disk(distribution.getIdealDisk( _system._nodeState, _system._nodeIndex, bucketId.stripUnused(), lib::Distribution::IDEAL_DISK_EVEN_IF_DOWN)); if (disk != partition) { @@ -459,9 +492,11 @@ namespace { // Always called from worker thread. It holds worker monitor. void -StorageBucketDBInitializer::sendReadBucketInfo(spi::PartitionId disk) +StorageBucketDBInitializer::sendReadBucketInfo(spi::PartitionId disk, document::BucketSpace bucketSpace) { - BucketReadState& state(*_readState[disk]); + auto itr = _readState[disk]->find(bucketSpace); + assert(itr != _readState[disk]->end()); + BucketReadState& state = *itr->second; if (state._done || state._pending.size() >= _config._maxPendingInfoReadsPerDisk) { @@ -473,7 +508,7 @@ StorageBucketDBInitializer::sendReadBucketInfo(spi::PartitionId disk) NextBucketOnDiskFinder finder(disk, state._databaseIterator, count); LOG(spam, "Iterating bucket db further. Starting at iterator %s", state._databaseIterator.toString().c_str()); - _system._bucketDatabase.all(finder, + _system.getBucketDatabase(bucketSpace).all(finder, "StorageBucketDBInitializer::readBucketInfo", state._databaseIterator.stripUnused().toKey()); if (finder._alreadySet > 0) { @@ -481,8 +516,8 @@ StorageBucketDBInitializer::sendReadBucketInfo(spi::PartitionId disk) _state._infoSetByLoad += finder._alreadySet; } for (uint32_t i=0; i<finder._next.size(); ++i) { - document::Bucket bucket(BucketSpace::placeHolder(), finder._next[i]); - ReadBucketInfo::SP cmd(new ReadBucketInfo(bucket)); + document::Bucket bucket(bucketSpace, finder._next[i]); + auto cmd = std::make_shared<ReadBucketInfo>(bucket); cmd->setPriority(_config._infoReadPriority); state._pending.insert(finder._next[i]); _state._infoRequests[cmd->getMsgId()] = disk; @@ -586,14 +621,16 @@ StorageBucketDBInitializer::handleReadBucketListReply( const spi::BucketIdListResult::List& list(reply.getBuckets()); api::BucketInfo info; assert(!info.valid()); + const auto &contentBucketSpace(_system._bucketSpaceRepo.get(reply.getBucketSpace())); + auto distribution(contentBucketSpace.getDistribution()); for (uint32_t i=0, n=list.size(); i<n; ++i) { - registerBucket(document::Bucket(reply.getBucketSpace(), list[i]), reply.getPartition(), info); + registerBucket(document::Bucket(reply.getBucketSpace(), list[i]), *distribution, reply.getPartition(), info); } if (++_state._dirsListed == _state._dirsToList) { handleListingCompleted(); } checkIfDone(); - sendReadBucketInfo(reply.getPartition()); + sendReadBucketInfo(reply.getPartition(), reply.getBucketSpace()); } // Always called from worker thread. It holds worker monitor. @@ -601,12 +638,13 @@ void StorageBucketDBInitializer::handleReadBucketInfoReply( ReadBucketInfoReply& reply) { + document::BucketSpace bucketSpace = reply.getBucket().getBucketSpace(); if (reply.getResult().failed()) { LOGBP(warning, "Deleting %s from bucket database. Cannot use it as we " "failed to read bucket info for it: %s", reply.getBucketId().toString().c_str(), reply.getResult().toString().c_str()); - _system._bucketDatabase.erase(reply.getBucketId(), + _system.getBucketDatabase(bucketSpace).erase(reply.getBucketId(), "dbinit.failedreply"); } _metrics._infoReadCount.inc(); @@ -622,7 +660,9 @@ StorageBucketDBInitializer::handleReadBucketInfoReply( } else { uint32_t disk(it->second); _state._infoRequests.erase(it->first); - BucketReadState& state(*_readState[disk]); + auto itr = _readState[disk]->find(bucketSpace); + assert(itr != _readState[disk]->end()); + BucketReadState& state = *itr->second; BucketSet::iterator it2(state._pending.find(reply.getBucketId())); if (it2 == state._pending.end()) { LOGBP(warning, "Got bucket info reply for %s that was registered " @@ -632,12 +672,12 @@ StorageBucketDBInitializer::handleReadBucketInfoReply( state._pending.erase(reply.getBucketId()); LOG(spam, "Got info reply for %s: %s", reply.getBucketId().toString().c_str(), - _system._bucketDatabase.get( + _system.getBucketDatabase(reply.getBucket().getBucketSpace()).get( reply.getBucketId(), "dbinit.inforeply") ->getBucketInfo().toString().c_str()); } checkIfDone(); - sendReadBucketInfo(spi::PartitionId(disk)); + sendReadBucketInfo(spi::PartitionId(disk), bucketSpace); } } @@ -661,7 +701,7 @@ StorageBucketDBInitializer::handleInternalBucketJoinReply( LOG(debug, "Completed internal bucket join for %s. Got bucket info %s", reply.getBucketId().toString().c_str(), reply.getBucketInfo().toString().c_str()); - StorBucketDatabase::WrappedEntry entry(_system._bucketDatabase.get( + StorBucketDatabase::WrappedEntry entry(_system.getBucketDatabase(reply.getBucket().getBucketSpace()).get( reply.getBucketId(), "StorageBucketDBInitializer::onInternalBucketJoinReply")); entry->setBucketInfo(reply.getBucketInfo()); @@ -674,6 +714,16 @@ StorageBucketDBInitializer::handleInternalBucketJoinReply( checkIfDone(); } +namespace { + +bool +isDone(const StorageBucketDBInitializer::ReadState &readState) +{ + return notDoneCount(readState) == 0; +} + +} + // Always called from worker thread. It holds worker monitor. void StorageBucketDBInitializer::checkIfDone() @@ -681,8 +731,8 @@ StorageBucketDBInitializer::checkIfDone() if (_state._dirsListed < _state._dirsToList) return; if (!_state._infoRequests.empty()) return; if (!_state._joins.empty()) return; - for (uint32_t i=0; i<_readState.size(); ++i) { - if (_readState[i].get() != 0 && !_readState[i]->_done) return; + if (!isDone(_readState)) { + return; } _state._doneInitializing = true; _system._doneInitializeHandler.notifyDoneInitializing(); @@ -698,17 +748,19 @@ StorageBucketDBInitializer::calculateMinProgressFromDiskIterators() const if (_readState[disk].get() == 0) { continue; } - const BucketReadState& state(*_readState[disk]); - document::BucketId bid(state._databaseIterator); + for (const auto &stateElem : *_readState[disk]) { + const BucketReadState &state = *stateElem.second; + document::BucketId bid(state._databaseIterator); - double progress; - if (!state._done) { - progress = BucketProgressCalculator::calculateProgress(bid); - } else { - progress = 1.0; - } + double progress; + if (!state._done) { + progress = BucketProgressCalculator::calculateProgress(bid); + } else { + progress = 1.0; + } - minProgress = std::min(minProgress, progress); + minProgress = std::min(minProgress, progress); + } } //std::cerr << "minProgress: " << minProgress << "\n"; return minProgress; diff --git a/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.h b/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.h index 99f273a384a..57b95e14f48 100644 --- a/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.h +++ b/storage/src/vespa/storage/bucketdb/storagebucketdbinitializer.h @@ -51,6 +51,7 @@ #include <vespa/vdslib/state/nodestate.h> #include <vespa/config/subscription/configuri.h> #include <list> +#include <unordered_map> namespace storage { @@ -77,9 +78,8 @@ class StorageBucketDBInitializer : public StorageLink, DoneInitializeHandler& _doneInitializeHandler; ServiceLayerComponent _component; const spi::PartitionStateList& _partitions; - StorBucketDatabase& _bucketDatabase; + const ContentBucketSpaceRepo& _bucketSpaceRepo; uint32_t _nodeIndex; - lib::Distribution& _distribution; lib::NodeState _nodeState; // Disk info for ideal state calculations framework::Thread::UP _thread; @@ -87,6 +87,8 @@ class StorageBucketDBInitializer : public StorageLink, DoneInitializeHandler& doneInitializeHandler, ServiceLayerComponentRegister&, const Config&); + + StorBucketDatabase &getBucketDatabase(document::BucketSpace bucketSpace) const; }; struct Metrics : public metrics::MetricSet { metrics::LongCountMetric _wrongDisk; @@ -126,12 +128,17 @@ class StorageBucketDBInitializer : public StorageLink, ~GlobalState(); }; +public: + using BucketSpaceReadState = std::unordered_map<document::BucketSpace, + std::unique_ptr<BucketReadState>, document::BucketSpace::hash>; + using ReadState = std::vector<std::unique_ptr<BucketSpaceReadState>>; +private: Config _config; System _system; Metrics _metrics; GlobalState _state; - std::vector<std::unique_ptr<BucketReadState>> _readState; + ReadState _readState; public: StorageBucketDBInitializer(const config::ConfigUri&, @@ -180,13 +187,14 @@ public: std::vector<uint32_t>& path); /** Register a bucket in the bucket database. */ void registerBucket(const document::Bucket &bucket, + const lib::Distribution &distribution, spi::PartitionId, api::BucketInfo bucketInfo); /** * Sends more read bucket info to a given disk. Lock must already be taken. * Will be released by function prior to sending messages down. */ - void sendReadBucketInfo(spi::PartitionId); + void sendReadBucketInfo(spi::PartitionId, document::BucketSpace bucketSpace); /** Check whether initialization is complete. Should hold lock to call it.*/ void checkIfDone(); diff --git a/storage/src/vespa/storage/bucketmover/bucketmover.cpp b/storage/src/vespa/storage/bucketmover/bucketmover.cpp index b9073ca2cdc..bc9a8b0c428 100644 --- a/storage/src/vespa/storage/bucketmover/bucketmover.cpp +++ b/storage/src/vespa/storage/bucketmover/bucketmover.cpp @@ -4,6 +4,7 @@ #include "htmltable.h" #include <vespa/storage/config/config-stor-server.h> #include <vespa/storage/common/bucketmessages.h> +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/common/nodestateupdater.h> #include <vespa/storage/storageutil/log.h> #include <vespa/config/common/exceptions.h> @@ -27,7 +28,7 @@ BucketMover::BucketMover(const config::ConfigUri & configUri, _cycleCount(0), _nextRun(0), _configFetcher(configUri.getContext()), - _diskDistribution(_component.getDistribution()->getDiskDistribution()), + _diskDistribution(currentDiskDistribution()), _maxSleepTime(60 * 60) { if (!configUri.empty()) { @@ -40,7 +41,7 @@ BucketMover::BucketMover(const config::ConfigUri & configUri, BucketMover::~BucketMover() { - if (_thread.get() != 0) { + if (_thread) { LOG(error, "BucketMover deleted without calling close() first"); onClose(); } @@ -61,10 +62,10 @@ BucketMover::onClose() // Avoid getting config during shutdown _configFetcher.close(); // Close thread to ensure we don't send anything more down after - if (_thread.get()) { + if (_thread) { _thread->interruptAndJoin(&_wait); LOG(debug, "Bucket mover worker thread closed."); - _thread.reset(0); + _thread.reset(); } } @@ -111,12 +112,14 @@ BucketMover::startNewRun() // If not in a run but time to start another one, do so LOG(debug, "Starting new move cycle at time %s.", _component.getClock().getTimeInSeconds().toString().c_str()); - _currentRun.reset(new bucketmover::Run( - _component.getBucketDatabase(BucketSpace::placeHolder()), - _component.getDistribution(), + // TODO consider if we should invoke bucket moving across all bucket spaces. Not likely to ever be needed. + // If so, we have to spawn off an individual Run per space, as it encompasses + // both a (disk) distribution and a bucket database. + _currentRun = std::make_unique<bucketmover::Run>( + _component.getBucketSpaceRepo().get(document::BucketSpace::placeHolder()), *_component.getStateUpdater().getReportedNodeState(), _component.getIndex(), - _component.getClock())); + _component.getClock()); } void @@ -124,8 +127,7 @@ BucketMover::queueNewMoves() { // If we have too few pending, send some new moves, if there are more // moves to perform. - while (_pendingMoves.size() < uint32_t(_config->maxPending)) - { + while (_pendingMoves.size() < uint32_t(_config->maxPending)) { Move nextMove = _currentRun->getNextMove(); // If no more moves to do, stop attempting to send more. @@ -133,11 +135,8 @@ BucketMover::queueNewMoves() break; } _pendingMoves.push_back(nextMove); - document::Bucket bucket(BucketSpace::placeHolder(), nextMove.getBucketId()); - std::shared_ptr<BucketDiskMoveCommand> cmd( - new BucketDiskMoveCommand(bucket, - nextMove.getSourceDisk(), - nextMove.getTargetDisk())); + auto cmd = std::make_shared<BucketDiskMoveCommand>( + nextMove.getBucket(), nextMove.getSourceDisk(), nextMove.getTargetDisk()); cmd->setPriority(nextMove.getPriority()); _newMoves.push_back(cmd); } @@ -171,11 +170,9 @@ BucketMover::finishCurrentRun() void BucketMover::sendNewMoves() { - for (std::list<BucketDiskMoveCommand::SP>::iterator it - = _newMoves.begin(); it != _newMoves.end(); ++it) - { - LOG(debug, "Moving bucket: %s", (**it).toString().c_str()); - sendDown(*it); + for (auto& move : _newMoves) { + LOG(debug, "Moving bucket: %s", move->toString().c_str()); + sendDown(move); // Be able to sleep a bit between moves for debugging to see // what is happening. (Cannot use wait() here as reply of @@ -196,7 +193,7 @@ BucketMover::tick() framework::SecondTime currentTime(_component.getClock().getTimeInSeconds()); - if (_currentRun.get() == 0) { + if (!_currentRun) { if (currentTime >= _nextRun) { startNewRun(); } else { @@ -287,25 +284,24 @@ bool BucketMover::onInternalReply( const std::shared_ptr<api::InternalReply>& internalReply) { - // We only care about move disk bucket replies - std::shared_ptr<BucketDiskMoveReply> reply( - std::dynamic_pointer_cast<BucketDiskMoveReply>(internalReply)); - if (!reply.get()) return false; + // We only care about move disk bucket replies + auto reply = std::dynamic_pointer_cast<BucketDiskMoveReply>(internalReply); + if (!reply) { + return false; + } - // Warn if we see move replies outside of a run. Should not be possible. + // Warn if we see move replies outside of a run. Should not be possible. vespalib::MonitorGuard monitor(_wait); - if (_currentRun.get() == 0) { + if (!_currentRun) { LOG(warning, "Got a bucket disk move reply while no run is active. " "This should not happen, as runs should stay active until " "all requests are answered."); return true; } - // Match move against pending ones + // Match move against pending ones Move move; - for (std::list<Move>::iterator it = _pendingMoves.begin(); - it != _pendingMoves.end(); ++it) - { - if (it->getBucketId() == reply->getBucketId() + for (auto it = _pendingMoves.begin(); it != _pendingMoves.end(); ++it) { + if (it->getBucket() == reply->getBucket() && it->getSourceDisk() == reply->getSrcDisk() && it->getTargetDisk() == reply->getDstDisk()) { @@ -338,18 +334,20 @@ BucketMover::onInternalReply( return true; } +// TODO if we start supporting disk moves for other spaces than the default space +// we also have to check all disk distributions here. void BucketMover::storageDistributionChanged() { - lib::Distribution::SP distribution = _component.getDistribution(); - - // Verify that the actual disk distribution changed, if not ignore - lib::Distribution::DiskDistribution newDistr(distribution->getDiskDistribution()); + // Verify that the actual disk distribution changed, if not ignore + lib::Distribution::DiskDistribution newDistr(currentDiskDistribution()); - if (_diskDistribution == newDistr) return; + if (_diskDistribution == newDistr) { + return; + } vespalib::MonitorGuard monitor(_wait); - if (_currentRun.get() != 0) { + if (_currentRun) { LOG(info, "Aborting bucket mover run as disk distribution changed " "from %s to %s.", lib::Distribution::getDiskDistributionName(_diskDistribution).c_str(), @@ -365,9 +363,14 @@ BucketMover::storageDistributionChanged() _nextRun = framework::SecondTime(0); } +lib::Distribution::DiskDistribution BucketMover::currentDiskDistribution() const { + auto distribution = _component.getBucketSpaceRepo().get(document::BucketSpace::placeHolder()).getDistribution(); + return distribution->getDiskDistribution(); +} + bool BucketMover::isWorkingOnCycle() const { vespalib::MonitorGuard monitor(_wait); - return (_currentRun.get() != 0); + return (_currentRun.get() != nullptr); } uint32_t BucketMover::getCycleCount() const { @@ -382,7 +385,7 @@ BucketMover::print(std::ostream& out, bool verbose, (void) verbose; (void) indent; vespalib::MonitorGuard monitor(_wait); out << "BucketMover() {"; - if (_currentRun.get() != 0) { + if (_currentRun) { out << "\n" << indent << " "; _currentRun->print(out, verbose, indent + " "); } else { @@ -390,11 +393,9 @@ BucketMover::print(std::ostream& out, bool verbose, } if (verbose && !_history.empty()) { out << "\n" << indent << " History:"; - for (std::list<RunStatistics>::const_iterator it = _history.begin(); - it != _history.end(); ++it) - { + for (auto& entry : _history) { out << "\n" << indent << " "; - it->print(out, true, indent + " "); + entry.print(out, true, indent + " "); } } out << "\n" << indent << "}"; @@ -412,17 +413,14 @@ BucketMover::reportHtmlStatus(std::ostream& out, printCurrentStatus(out, *_history.begin()); } out << "<h2>Current move cycle</h2>\n"; - if (_currentRun.get() != 0) { + if (_currentRun) { printRunHtml(out, *_currentRun); if (_currentRun->getPendingMoves().empty()) { out << "<blockquote>No pending moves.</blockquote>\n"; } else { out << "<blockquote>Pending bucket moves:<ul>\n"; - for (std::list<Move>::const_iterator it - = _currentRun->getPendingMoves().begin(); - it != _currentRun->getPendingMoves().end(); ++it) - { - out << "<li>" << *it << "</li>\n"; + for (auto& entry : _currentRun->getPendingMoves()) { + out << "<li>" << entry << "</li>\n"; } out << "</ul></blockquote>\n"; } @@ -432,7 +430,7 @@ BucketMover::reportHtmlStatus(std::ostream& out, framework::SecondTime currentTime( _component.getClock().getTimeInSeconds()); if (_nextRun <= currentTime) { - if (_thread.get() != 0) { + if (_thread) { out << "Next run to start immediately."; // Wake up thread, so user sees it starts immediately :) monitor.signal(); @@ -454,10 +452,8 @@ BucketMover::reportHtmlStatus(std::ostream& out, } if (!_history.empty()) { out << "<h2>Statistics from previous bucket mover cycles</h2>\n"; - for (std::list<RunStatistics>::const_iterator it = _history.begin(); - it != _history.end(); ++it) - { - printRunStatisticsHtml(out, *it); + for (auto& entry : _history) { + printRunStatisticsHtml(out, entry); } } } diff --git a/storage/src/vespa/storage/bucketmover/bucketmover.h b/storage/src/vespa/storage/bucketmover/bucketmover.h index 0b9cfc04455..42cbf693fcb 100644 --- a/storage/src/vespa/storage/bucketmover/bucketmover.h +++ b/storage/src/vespa/storage/bucketmover/bucketmover.h @@ -83,6 +83,7 @@ private: void run(framework::ThreadHandle&) override; bool onInternalReply(const std::shared_ptr<api::InternalReply>&) override; void storageDistributionChanged() override; + lib::Distribution::DiskDistribution currentDiskDistribution() const; framework::SecondTime calculateWaitTimeOfNextRun() const; diff --git a/storage/src/vespa/storage/bucketmover/move.cpp b/storage/src/vespa/storage/bucketmover/move.cpp index 7d6f96c46ba..b91329fad36 100644 --- a/storage/src/vespa/storage/bucketmover/move.cpp +++ b/storage/src/vespa/storage/bucketmover/move.cpp @@ -9,13 +9,13 @@ namespace bucketmover { Move::Move() : _sourceDisk(0), _targetDisk(0), - _bucket(0), + _bucket(document::BucketSpace::placeHolder(), document::BucketId(0)), _totalDocSize(0), _priority(255) { } -Move::Move(uint16_t source, uint16_t target, const document::BucketId& bucket, +Move::Move(uint16_t source, uint16_t target, const document::Bucket& bucket, uint32_t totalDocSize) : _sourceDisk(source), _targetDisk(target), diff --git a/storage/src/vespa/storage/bucketmover/move.h b/storage/src/vespa/storage/bucketmover/move.h index e2401e5abed..92d05e4e0ae 100644 --- a/storage/src/vespa/storage/bucketmover/move.h +++ b/storage/src/vespa/storage/bucketmover/move.h @@ -8,7 +8,7 @@ #pragma once -#include <vespa/document/bucket/bucketid.h> +#include <vespa/document/bucket/bucket.h> #include <vespa/vespalib/util/printable.h> namespace storage { @@ -17,22 +17,22 @@ namespace bucketmover { class Move : public vespalib::Printable { uint16_t _sourceDisk; uint16_t _targetDisk; - document::BucketId _bucket; + document::Bucket _bucket; uint32_t _totalDocSize; uint8_t _priority; public: Move(); - Move(uint16_t source, uint16_t target, const document::BucketId& bucket, + Move(uint16_t source, uint16_t target, const document::Bucket& bucket, uint32_t totalDocSize); /** False if invalid move. (Empty constructor) Indicates end of run. */ - bool isDefined() const { return (_bucket.getRawId() != 0); } + bool isDefined() const { return (_bucket.getBucketId().getRawId() != 0); } // Only valid to call if move is defined uint16_t getSourceDisk() const { return _sourceDisk; } uint16_t getTargetDisk() const { return _targetDisk; } - const document::BucketId& getBucketId() const { return _bucket; } + const document::Bucket& getBucket() const { return _bucket; } uint8_t getPriority() const { return _priority; } uint32_t getTotalDocSize() const { return _totalDocSize; } diff --git a/storage/src/vespa/storage/bucketmover/run.cpp b/storage/src/vespa/storage/bucketmover/run.cpp index 6a0ef2079ce..22bcfa55f15 100644 --- a/storage/src/vespa/storage/bucketmover/run.cpp +++ b/storage/src/vespa/storage/bucketmover/run.cpp @@ -12,24 +12,24 @@ LOG_SETUP(".bucketmover.run"); namespace storage { namespace bucketmover { -Run::Run(StorBucketDatabase& db, - lib::Distribution::SP distribution, +Run::Run(ContentBucketSpace& bucketSpace, const lib::NodeState& nodeState, uint16_t nodeIndex, framework::Clock& clock) - : _bucketDatabase(db), - _distribution(distribution), + : _bucketSpace(bucketSpace), + _distribution(bucketSpace.getDistribution()), _nodeState(nodeState), _nodeIndex(nodeIndex), _entries(), _iterationDone(false), - _statistics(distribution->getDiskDistribution(), clock, nodeState), + _statistics(_distribution->getDiskDistribution(), clock, nodeState), _aborted(false) { } namespace { struct BucketIterator { + document::BucketSpace _iteratedBucketSpace; const lib::Distribution& _distribution; const lib::NodeState& _nodeState; RunStatistics& _statistics; @@ -39,10 +39,12 @@ namespace { uint32_t _bucketsVisited; document::BucketId _firstBucket; - BucketIterator(const lib::Distribution& d, const lib::NodeState& ns, + BucketIterator(document::BucketSpace iteratedBucketSpace, + const lib::Distribution& d, const lib::NodeState& ns, uint16_t nodeIndex, RunStatistics& stats, std::list<Move>& entries) - : _distribution(d), + : _iteratedBucketSpace(iteratedBucketSpace), + _distribution(d), _nodeState(ns), _statistics(stats), _entries(entries), @@ -57,12 +59,12 @@ namespace { operator()(document::BucketId::Type revId, StorBucketDatabase::Entry& entry) { - document::BucketId bucket(document::BucketId::keyToBucketId(revId)); - if (bucket == _firstBucket) { + document::BucketId bucketId(document::BucketId::keyToBucketId(revId)); + if (bucketId == _firstBucket) { return StorBucketDatabase::CONTINUE; } uint16_t idealDisk = _distribution.getIdealDisk( - _nodeState, _nodeIndex, bucket, + _nodeState, _nodeIndex, bucketId, lib::Distribution::IDEAL_DISK_EVEN_IF_DOWN); RunStatistics::DiskData& diskData( _statistics._diskData[entry.disk]); @@ -72,10 +74,11 @@ namespace { diskData._bucketSize += entry.getBucketInfo().getTotalDocumentSize(); ++diskData._bucketsFoundOnCorrectDisk; } else { + document::Bucket bucket(_iteratedBucketSpace, bucketId); _entries.push_back(Move( entry.disk, idealDisk, bucket, entry.getBucketInfo().getTotalDocumentSize())); } - _statistics._lastBucketVisited = bucket; + _statistics._lastBucketVisited = bucketId; if (++_bucketsVisited >= _maxBucketsToIterateAtOnce) { return StorBucketDatabase::ABORT; } @@ -104,18 +107,16 @@ Run::getNextMove() if (!_statistics._diskData[e.getTargetDisk()]._diskDisabled) { _pending.push_back(e); - _statistics._lastBucketProcessed = e.getBucketId(); - _statistics._lastBucketProcessedTime - = _statistics._clock->getTimeInSeconds(); + _statistics._lastBucketProcessed = e.getBucket(); // Only used for printing + _statistics._lastBucketProcessedTime = _statistics._clock->getTimeInSeconds(); return e; } } // Cache more entries - BucketIterator it(*_distribution, _nodeState, _nodeIndex, _statistics, - _entries); - _bucketDatabase.all(it, "bucketmover::Run", - _statistics._lastBucketVisited.toKey()); + BucketIterator it(_bucketSpace.bucketSpace(), *_distribution, + _nodeState, _nodeIndex, _statistics, _entries); + _bucketSpace.bucketDatabase().all(it, "bucketmover::Run", _statistics._lastBucketVisited.toKey()); if (it._bucketsVisited == 0) { _iterationDone = true; if (_pending.empty()) { @@ -128,31 +129,6 @@ Run::getNextMove() } void -Run::depleteMoves() -{ - while (true) { - // Cache more entries - BucketIterator bi(*_distribution, _nodeState, _nodeIndex, _statistics, - _entries); - _bucketDatabase.all(bi, "bucketmover::depleteMoves", - _statistics._lastBucketVisited.toKey()); - if (bi._bucketsVisited == 0) { - break; - } - for (std::list<Move>::const_iterator it = _entries.begin(); - it != _entries.end(); ++it) - { - ++_statistics._diskData[it->getSourceDisk()][it->getTargetDisk()] - ._bucketsLeftOnWrongDisk; - uint32_t size = it->getTotalDocSize(); - _statistics._diskData[it->getSourceDisk()]._bucketSize += size; - } - _entries.clear(); - } - finalize(); -} - -void Run::finalize() { _statistics._endTime = _statistics._clock->getTimeInSeconds(); @@ -162,10 +138,8 @@ void Run::removePending(Move& move) { bool foundPending = false; - for (std::list<Move>::iterator it = _pending.begin(); it != _pending.end(); - ++it) - { - if (it->getBucketId() == move.getBucketId()) { + for (auto it = _pending.begin(); it != _pending.end(); ++it) { + if (it->getBucket() == move.getBucket()) { _pending.erase(it); foundPending = true; break; @@ -173,7 +147,7 @@ Run::removePending(Move& move) } if (!foundPending) { LOG(warning, "Got answer for %s that was not in the pending list.", - move.getBucketId().toString().c_str()); + move.getBucket().toString().c_str()); return; } if (_iterationDone && _pending.empty()) { diff --git a/storage/src/vespa/storage/bucketmover/run.h b/storage/src/vespa/storage/bucketmover/run.h index 11f2cf0763c..eb7a6df2d17 100644 --- a/storage/src/vespa/storage/bucketmover/run.h +++ b/storage/src/vespa/storage/bucketmover/run.h @@ -18,6 +18,7 @@ #include "move.h" #include "runstatistics.h" +#include <vespa/storage/common/content_bucket_space.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vdslib/state/nodestate.h> #include <list> @@ -33,8 +34,8 @@ class Clock; namespace bucketmover { class Run : public document::Printable { - StorBucketDatabase& _bucketDatabase; - lib::Distribution::SP _distribution; + ContentBucketSpace& _bucketSpace; + std::shared_ptr<const lib::Distribution> _distribution; lib::NodeState _nodeState; uint16_t _nodeIndex; uint32_t _maxEntriesToKeep; @@ -48,8 +49,7 @@ class Run : public document::Printable { public: Run(const Run &) = delete; Run & operator = (const Run &) = delete; - Run(StorBucketDatabase&, - lib::Distribution::SP, + Run(ContentBucketSpace& bucketSpace, const lib::NodeState&, uint16_t nodeIndex, framework::Clock&); @@ -78,12 +78,6 @@ public: */ Move getNextMove(); - /** - * Run through the database not doing any moves. Useful to do a run only - * to gather statistics of current state. - */ - void depleteMoves(); - void moveOk(Move& move); void moveFailedBucketNotFound(Move& move); void moveFailed(Move& move); diff --git a/storage/src/vespa/storage/bucketmover/runstatistics.cpp b/storage/src/vespa/storage/bucketmover/runstatistics.cpp index 8f7fe67fcf3..314f04a0d66 100644 --- a/storage/src/vespa/storage/bucketmover/runstatistics.cpp +++ b/storage/src/vespa/storage/bucketmover/runstatistics.cpp @@ -39,7 +39,7 @@ RunStatistics::RunStatistics(DiskDistribution d, framework::Clock& clock, const lib::NodeState& ns) : _clock(&clock), _distribution(d), - _lastBucketProcessed(0), + _lastBucketProcessed(), _lastBucketVisited(0), _diskData(ns.getDiskCount(), DiskData(ns.getDiskCount())), _startTime(_clock->getTimeInSeconds()), @@ -149,13 +149,14 @@ RunStatistics::getWronglyPlacedRatio() const return static_cast<double>(wrong) / total; } +// FIXME does not cover multiple spaces (but only used for printing) double RunStatistics::getProgress() const { if (_endTime.isSet()) return 1.0; double result = 0; double weight = 0.5; - uint64_t key = _lastBucketProcessed.toKey(); + uint64_t key = _lastBucketProcessed.getBucketId().toKey(); for (uint16_t i=0; i<64; ++i) { uint64_t flag = uint64_t(1) << (63 - i); if ((key & flag) == flag) { diff --git a/storage/src/vespa/storage/bucketmover/runstatistics.h b/storage/src/vespa/storage/bucketmover/runstatistics.h index da51be9ef7d..908f345b307 100644 --- a/storage/src/vespa/storage/bucketmover/runstatistics.h +++ b/storage/src/vespa/storage/bucketmover/runstatistics.h @@ -38,7 +38,7 @@ #include <vespa/vdslib/state/nodestate.h> #include <vespa/vdslib/distribution/distribution.h> -#include <vespa/document/bucket/bucketid.h> +#include <vespa/document/bucket/bucket.h> #include <vespa/vespalib/util/printable.h> #include <vespa/storageframework/generic/clock/time.h> @@ -76,7 +76,7 @@ struct RunStatistics : public document::Printable { framework::Clock* _clock; DiskDistribution _distribution; - document::BucketId _lastBucketProcessed; + document::Bucket _lastBucketProcessed; document::BucketId _lastBucketVisited; // Invalid bucket for starting point std::vector<DiskData> _diskData; framework::SecondTime _startTime; diff --git a/storage/src/vespa/storage/common/bucket_resolver.h b/storage/src/vespa/storage/common/bucket_resolver.h new file mode 100644 index 00000000000..f1e334807bf --- /dev/null +++ b/storage/src/vespa/storage/common/bucket_resolver.h @@ -0,0 +1,21 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/document/bucket/bucket.h> +#include <vespa/vespalib/stllike/string.h> + +namespace document { class DocumentId; } + +namespace storage { + +/** + * Interface for resolving which bucket a given a document id belongs to. + */ +struct BucketResolver { + virtual ~BucketResolver() {} + virtual document::Bucket bucketFromId(const document::DocumentId &documentId) const = 0; + virtual document::BucketSpace bucketSpaceFromName(const vespalib::string &bucketSpace) const = 0; + virtual vespalib::string nameFromBucketSpace(const document::BucketSpace &bucketSpace) const = 0; +}; + +} diff --git a/storage/src/vespa/storage/common/bucketmessages.cpp b/storage/src/vespa/storage/common/bucketmessages.cpp index 1d9d64ad24f..3157bad49e5 100644 --- a/storage/src/vespa/storage/common/bucketmessages.cpp +++ b/storage/src/vespa/storage/common/bucketmessages.cpp @@ -39,6 +39,12 @@ ReadBucketListReply::ReadBucketListReply(const ReadBucketList& cmd) ReadBucketListReply::~ReadBucketListReply() { } +document::Bucket +ReadBucketListReply::getBucket() const +{ + return document::Bucket(_bucketSpace, document::BucketId()); +} + void ReadBucketListReply::print(std::ostream& out, bool verbose, const std::string& indent) const { diff --git a/storage/src/vespa/storage/common/bucketmessages.h b/storage/src/vespa/storage/common/bucketmessages.h index 0ff7a22aa4d..941928b1064 100644 --- a/storage/src/vespa/storage/common/bucketmessages.h +++ b/storage/src/vespa/storage/common/bucketmessages.h @@ -55,6 +55,7 @@ public: document::BucketSpace getBucketSpace() const { return _bucketSpace; } spi::PartitionId getPartition() const { return _partition; } + document::Bucket getBucket() const override; spi::BucketIdListResult::List& getBuckets() { return _buckets; } const spi::BucketIdListResult::List& getBuckets() const { diff --git a/storage/src/vespa/storage/common/content_bucket_space.cpp b/storage/src/vespa/storage/common/content_bucket_space.cpp index b78be81c9de..4344bccc785 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space.cpp @@ -4,9 +4,26 @@ namespace storage { -ContentBucketSpace::ContentBucketSpace() - : _bucketDatabase() +ContentBucketSpace::ContentBucketSpace(document::BucketSpace bucketSpace) + : _bucketSpace(bucketSpace), + _bucketDatabase(), + _lock(), + _distribution() { } +void +ContentBucketSpace::setDistribution(std::shared_ptr<const lib::Distribution> distribution) +{ + std::lock_guard<std::mutex> guard(_lock); + _distribution = std::move(distribution); +} + +std::shared_ptr<const lib::Distribution> +ContentBucketSpace::getDistribution() const +{ + std::lock_guard<std::mutex> guard(_lock); + return _distribution; +} + } diff --git a/storage/src/vespa/storage/common/content_bucket_space.h b/storage/src/vespa/storage/common/content_bucket_space.h index 2efb2eca06d..3b3dddade4f 100644 --- a/storage/src/vespa/storage/common/content_bucket_space.h +++ b/storage/src/vespa/storage/common/content_bucket_space.h @@ -1,21 +1,32 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/document/bucket/bucketspace.h> #include <vespa/storage/bucketdb/storbucketdb.h> +#include <mutex> namespace storage { +namespace lib { class Distribution; } + /** * Class representing a bucket space (with associated bucket database) on a content node. */ class ContentBucketSpace { private: + document::BucketSpace _bucketSpace; StorBucketDatabase _bucketDatabase; + mutable std::mutex _lock; + std::shared_ptr<const lib::Distribution> _distribution; public: using UP = std::unique_ptr<ContentBucketSpace>; - ContentBucketSpace(); + ContentBucketSpace(document::BucketSpace bucketSpace); + + document::BucketSpace bucketSpace() const noexcept { return _bucketSpace; } StorBucketDatabase &bucketDatabase() { return _bucketDatabase; } + void setDistribution(std::shared_ptr<const lib::Distribution> distribution); + std::shared_ptr<const lib::Distribution> getDistribution() const; }; } diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.cpp b/storage/src/vespa/storage/common/content_bucket_space_repo.cpp index 04e2c4c27d3..1846c132c0a 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.cpp +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.cpp @@ -9,7 +9,7 @@ namespace storage { ContentBucketSpaceRepo::ContentBucketSpaceRepo() : _map() { - _map.emplace(BucketSpace::placeHolder(), std::make_unique<ContentBucketSpace>()); + _map.emplace(BucketSpace::placeHolder(), std::make_unique<ContentBucketSpace>(BucketSpace::placeHolder())); } ContentBucketSpace & @@ -21,6 +21,16 @@ ContentBucketSpaceRepo::get(BucketSpace bucketSpace) const return *itr->second; } +ContentBucketSpaceRepo::BucketSpaces +ContentBucketSpaceRepo::getBucketSpaces() const +{ + BucketSpaces result; + for (const auto &elem : _map) { + result.push_back(elem.first); + } + return result; +} + size_t ContentBucketSpaceRepo::getBucketMemoryUsage() const { diff --git a/storage/src/vespa/storage/common/content_bucket_space_repo.h b/storage/src/vespa/storage/common/content_bucket_space_repo.h index 390cfc15f5d..0d4ddb86bcf 100644 --- a/storage/src/vespa/storage/common/content_bucket_space_repo.h +++ b/storage/src/vespa/storage/common/content_bucket_space_repo.h @@ -13,6 +13,7 @@ namespace storage { class ContentBucketSpaceRepo { public: using BucketSpaceMap = std::unordered_map<document::BucketSpace, ContentBucketSpace::UP, document::BucketSpace::hash>; + using BucketSpaces = std::vector<document::BucketSpace>; private: BucketSpaceMap _map; @@ -23,6 +24,7 @@ public: BucketSpaceMap::const_iterator begin() const { return _map.begin(); } BucketSpaceMap::const_iterator end() const { return _map.end(); } + BucketSpaces getBucketSpaces() const; size_t getBucketMemoryUsage() const; template <typename Functor> diff --git a/storage/src/vespa/storage/common/servicelayercomponent.cpp b/storage/src/vespa/storage/common/servicelayercomponent.cpp index 68c41536f97..11311a4d189 100644 --- a/storage/src/vespa/storage/common/servicelayercomponent.cpp +++ b/storage/src/vespa/storage/common/servicelayercomponent.cpp @@ -28,7 +28,7 @@ ServiceLayerComponent::getBucketDatabase(BucketSpace bucketSpace) const uint16_t ServiceLayerComponent::getIdealPartition(const document::Bucket& bucket) const { - return getDistribution()->getIdealDisk( + return _bucketSpaceRepo->get(bucket.getBucketSpace()).getDistribution()->getIdealDisk( *getStateUpdater().getReportedNodeState(), getIndex(), bucket.getBucketId(), lib::Distribution::IDEAL_DISK_EVEN_IF_DOWN); } @@ -37,7 +37,7 @@ uint16_t ServiceLayerComponent::getPreferredAvailablePartition( const document::Bucket& bucket) const { - return getDistribution()->getPreferredAvailableDisk( + return _bucketSpaceRepo->get(bucket.getBucketSpace()).getDistribution()->getPreferredAvailableDisk( *getStateUpdater().getReportedNodeState(), getIndex(), bucket.getBucketId()); } diff --git a/storage/src/vespa/storage/config/CMakeLists.txt b/storage/src/vespa/storage/config/CMakeLists.txt index 4a20d510043..65eeeaf3221 100644 --- a/storage/src/vespa/storage/config/CMakeLists.txt +++ b/storage/src/vespa/storage/config/CMakeLists.txt @@ -28,3 +28,5 @@ vespa_generate_config(storage_storageconfig stor-prioritymapping.def) install_config_definition(stor-prioritymapping.def vespa.config.content.core.stor-prioritymapping.def) vespa_generate_config(storage_storageconfig rpc-provider.def) install_config_definition(rpc-provider.def vespa.config.content.core.rpc-provider.def) +vespa_generate_config(storage_storageconfig bucketspaces.def) +install_config_definition(bucketspaces.def vespa.config.content.core.bucketspaces.def) diff --git a/storage/src/vespa/storage/config/bucketspaces.def b/storage/src/vespa/storage/config/bucketspaces.def new file mode 100644 index 00000000000..3ed1abba0b4 --- /dev/null +++ b/storage/src/vespa/storage/config/bucketspaces.def @@ -0,0 +1,11 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=vespa.config.content.core + +## This config contains the document types handled by a given content cluster +## and the bucket space they belong to. + +## The name of a document type. +documenttype[].name string + +## The bucket space this document type belongs to. +documenttype[].bucketspace string diff --git a/storage/src/vespa/storage/distributor/CMakeLists.txt b/storage/src/vespa/storage/distributor/CMakeLists.txt index 3dc08d858ad..8adbfcaf9da 100644 --- a/storage/src/vespa/storage/distributor/CMakeLists.txt +++ b/storage/src/vespa/storage/distributor/CMakeLists.txt @@ -8,7 +8,6 @@ vespa_add_library(storage_distributor bucketlistmerger.cpp clusterinformation.cpp distributor_bucket_space.cpp - distributor_bucket_space_component.cpp distributor_bucket_space_repo.cpp distributor.cpp distributor_host_info_reporter.cpp @@ -27,6 +26,7 @@ vespa_add_library(storage_distributor operationtargetresolverimpl.cpp ownership_transfer_safe_time_point_calculator.cpp pendingclusterstate.cpp + pending_bucket_space_db_transition.cpp pendingmessagetracker.cpp persistence_operation_metric_set.cpp persistencemessagetracker.cpp diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp index 569136b8b10..46fa0f72d76 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.cpp +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.cpp @@ -2,6 +2,8 @@ #include "bucketdbupdater.h" #include "distributor.h" +#include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include "simpleclusterinformation.h" #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/message/persistence.h> @@ -20,13 +22,12 @@ namespace storage::distributor { BucketDBUpdater::BucketDBUpdater(Distributor& owner, DistributorBucketSpaceRepo &bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, DistributorMessageSender& sender, DistributorComponentRegister& compReg) : framework::StatusReporter("bucketdb", "Bucket DB Updater"), - _bucketSpaceComponent(owner, bucketSpaceRepo, bucketSpace, compReg, "Bucket DB Updater"), + _distributorComponent(owner, bucketSpaceRepo, compReg, "Bucket DB Updater"), _sender(sender), - _transitionTimer(_bucketSpaceComponent.getClock()) + _transitionTimer(_distributorComponent.getClock()) { } @@ -59,13 +60,11 @@ BucketDBUpdater::hasPendingClusterState() const } BucketOwnership -BucketDBUpdater::checkOwnershipInPendingState(const document::BucketId& b) const +BucketDBUpdater::checkOwnershipInPendingState(const document::Bucket& b) const { if (hasPendingClusterState()) { const lib::ClusterState& state(_pendingClusterState->getNewClusterState()); - const lib::Distribution& distribution(_pendingClusterState->getDistribution()); - document::Bucket bucket(BucketSpace::placeHolder(), b); - if (!_bucketSpaceComponent.ownsBucketInState(distribution, state, bucket)) { + if (!_distributorComponent.ownsBucketInState(state, b)) { return BucketOwnership::createNotOwnedInState(state); } } @@ -75,18 +74,18 @@ BucketDBUpdater::checkOwnershipInPendingState(const document::BucketId& b) const void BucketDBUpdater::sendRequestBucketInfo( uint16_t node, - const document::BucketId& bucket, + const document::Bucket& bucket, const std::shared_ptr<MergeReplyGuard>& mergeReplyGuard) { - if (!_bucketSpaceComponent.storageNodeIsUp(node)) { + if (!_distributorComponent.storageNodeIsUp(node)) { return; } std::vector<document::BucketId> buckets; - buckets.push_back(bucket); + buckets.push_back(bucket.getBucketId()); std::shared_ptr<api::RequestBucketInfoCommand> msg( - new api::RequestBucketInfoCommand(BucketSpace::placeHolder(), buckets)); + new api::RequestBucketInfoCommand(bucket.getBucketSpace(), buckets)); LOG(debug, "Sending request bucket info command %lu for " @@ -96,40 +95,43 @@ BucketDBUpdater::sendRequestBucketInfo( node); msg->setPriority(50); - msg->setAddress(_bucketSpaceComponent.nodeAddress(node)); + msg->setAddress(_distributorComponent.nodeAddress(node)); _sentMessages[msg->getMsgId()] = - BucketRequest(node, _bucketSpaceComponent.getUniqueTimestamp(), + BucketRequest(node, _distributorComponent.getUniqueTimestamp(), bucket, mergeReplyGuard); _sender.sendCommand(msg); } void BucketDBUpdater::recheckBucketInfo(uint32_t nodeIdx, - const document::BucketId& bid) + const document::Bucket& bucket) { - sendRequestBucketInfo(nodeIdx, bid, std::shared_ptr<MergeReplyGuard>()); + sendRequestBucketInfo(nodeIdx, bucket, std::shared_ptr<MergeReplyGuard>()); } void BucketDBUpdater::removeSuperfluousBuckets( - const lib::Distribution& newDistribution, const lib::ClusterState& newState) { - // Remove all buckets not belonging to this distributor, or - // being on storage nodes that are no longer up. - NodeRemover proc( - _bucketSpaceComponent.getClusterState(), - newState, - _bucketSpaceComponent.getBucketIdFactory(), - _bucketSpaceComponent.getIndex(), - newDistribution, - _bucketSpaceComponent.getDistributor().getStorageNodeUpStates()); - - _bucketSpaceComponent.getBucketDatabase().forEach(proc); - - for (const auto & entry :proc.getBucketsToRemove()) { - _bucketSpaceComponent.getBucketDatabase().remove(entry); + for (auto &elem : _distributorComponent.getBucketSpaceRepo()) { + const auto &newDistribution(elem.second->getDistribution()); + auto &bucketDb(elem.second->getBucketDatabase()); + + // Remove all buckets not belonging to this distributor, or + // being on storage nodes that are no longer up. + NodeRemover proc( + _distributorComponent.getClusterState(), + newState, + _distributorComponent.getBucketIdFactory(), + _distributorComponent.getIndex(), + newDistribution, + _distributorComponent.getDistributor().getStorageNodeUpStates()); + bucketDb.forEach(proc); + + for (const auto & entry :proc.getBucketsToRemove()) { + bucketDb.remove(entry); + } } } @@ -140,37 +142,35 @@ BucketDBUpdater::ensureTransitionTimerStarted() // that will make transition times appear artificially low. if (!hasPendingClusterState()) { _transitionTimer = framework::MilliSecTimer( - _bucketSpaceComponent.getClock()); + _distributorComponent.getClock()); } } void BucketDBUpdater::completeTransitionTimer() { - _bucketSpaceComponent.getDistributor().getMetrics() + _distributorComponent.getDistributor().getMetrics() .stateTransitionTime.addValue(_transitionTimer.getElapsedTimeAsDouble()); } void -BucketDBUpdater::storageDistributionChanged( - const lib::Distribution& distribution) +BucketDBUpdater::storageDistributionChanged() { ensureTransitionTimerStarted(); - removeSuperfluousBuckets(distribution, - _bucketSpaceComponent.getClusterState()); + removeSuperfluousBuckets(_distributorComponent.getClusterState()); ClusterInformation::CSP clusterInfo(new SimpleClusterInformation( - _bucketSpaceComponent.getIndex(), - distribution, - _bucketSpaceComponent.getClusterState(), - _bucketSpaceComponent.getDistributor().getStorageNodeUpStates())); + _distributorComponent.getIndex(), + _distributorComponent.getClusterState(), + _distributorComponent.getDistributor().getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForDistributionChange( - _bucketSpaceComponent.getClock(), + _distributorComponent.getClock(), std::move(clusterInfo), _sender, - _bucketSpaceComponent.getUniqueTimestamp()); - _outdatedNodes = _pendingClusterState->getOutdatedNodeSet(); + _distributorComponent.getBucketSpaceRepo(), + _distributorComponent.getUniqueTimestamp()); + _outdatedNodesMap = _pendingClusterState->getOutdatedNodesMap(); } void @@ -179,7 +179,7 @@ BucketDBUpdater::replyToPreviousPendingClusterStateIfAny() if (_pendingClusterState.get() && _pendingClusterState->getCommand().get()) { - _bucketSpaceComponent.sendUp( + _distributorComponent.sendUp( std::make_shared<api::SetSystemStateReply>(*_pendingClusterState->getCommand())); } } @@ -192,7 +192,7 @@ BucketDBUpdater::onSetSystemState( "Received new cluster state %s", cmd->getSystemState().toString().c_str()); - lib::ClusterState oldState = _bucketSpaceComponent.getClusterState(); + lib::ClusterState oldState = _distributorComponent.getClusterState(); const lib::ClusterState& state = cmd->getSystemState(); if (state == oldState) { @@ -200,26 +200,24 @@ BucketDBUpdater::onSetSystemState( } ensureTransitionTimerStarted(); - removeSuperfluousBuckets( - _bucketSpaceComponent.getDistribution(), - cmd->getSystemState()); + removeSuperfluousBuckets(cmd->getSystemState()); replyToPreviousPendingClusterStateIfAny(); ClusterInformation::CSP clusterInfo( new SimpleClusterInformation( - _bucketSpaceComponent.getIndex(), - _bucketSpaceComponent.getDistribution(), - _bucketSpaceComponent.getClusterState(), - _bucketSpaceComponent.getDistributor() + _distributorComponent.getIndex(), + _distributorComponent.getClusterState(), + _distributorComponent.getDistributor() .getStorageNodeUpStates())); _pendingClusterState = PendingClusterState::createForClusterStateChange( - _bucketSpaceComponent.getClock(), + _distributorComponent.getClock(), std::move(clusterInfo), _sender, + _distributorComponent.getBucketSpaceRepo(), cmd, - _outdatedNodes, - _bucketSpaceComponent.getUniqueTimestamp()); - _outdatedNodes = _pendingClusterState->getOutdatedNodeSet(); + _outdatedNodesMap, + _distributorComponent.getUniqueTimestamp()); + _outdatedNodesMap = _pendingClusterState->getOutdatedNodesMap(); if (isPendingClusterStateCompleted()) { processCompletedPendingClusterState(); @@ -246,7 +244,7 @@ BucketDBUpdater::onMergeBucketReply( // bucket again to make sure it's ok. for (uint32_t i = 0; i < reply->getNodes().size(); i++) { sendRequestBucketInfo(reply->getNodes()[i].index, - reply->getBucketId(), + reply->getBucket(), replyGuard); } @@ -256,7 +254,7 @@ BucketDBUpdater::onMergeBucketReply( void BucketDBUpdater::enqueueRecheckUntilPendingStateEnabled( uint16_t node, - const document::BucketId& bucket) + const document::Bucket& bucket) { LOG(spam, "DB updater has a pending cluster state, enqueuing recheck " @@ -303,10 +301,10 @@ BucketDBUpdater::onNotifyBucketChange( if (hasPendingClusterState()) { enqueueRecheckUntilPendingStateEnabled(cmd->getSourceIndex(), - cmd->getBucketId()); + cmd->getBucket()); } else { sendRequestBucketInfo(cmd->getSourceIndex(), - cmd->getBucketId(), + cmd->getBucket(), std::shared_ptr<MergeReplyGuard>()); } @@ -355,8 +353,8 @@ BucketDBUpdater::handleSingleBucketInfoFailure( LOG(debug, "Request bucket info failed towards node %d: error was %s", req.targetNode, repl->getResult().toString().c_str()); - if (req.bucket != document::BucketId(0)) { - framework::MilliSecTime sendTime(_bucketSpaceComponent.getClock()); + if (req.bucket.getBucketId() != document::BucketId(0)) { + framework::MilliSecTime sendTime(_distributorComponent.getClock()); sendTime += framework::MilliSecTime(100); _delayedRequests.emplace_back(sendTime, req); } @@ -369,7 +367,7 @@ BucketDBUpdater::resendDelayedMessages() _pendingClusterState->resendDelayedMessages(); } if (_delayedRequests.empty()) return; // Don't fetch time if not needed - framework::MilliSecTime currentTime(_bucketSpaceComponent.getClock()); + framework::MilliSecTime currentTime(_distributorComponent.getClock()); while (!_delayedRequests.empty() && currentTime >= _delayedRequests.front().first) { @@ -407,7 +405,7 @@ BucketDBUpdater::mergeBucketInfoWithDatabase( std::sort(newList.begin(), newList.end(), sort_pred); BucketListMerger merger(newList, existing, req.timestamp); - updateDatabase(req.targetNode, merger); + updateDatabase(req.bucket.getBucketSpace(), req.targetNode, merger); } bool @@ -424,7 +422,7 @@ BucketDBUpdater::processSingleBucketInfoReply( BucketRequest req = iter->second; _sentMessages.erase(iter); - if (!_bucketSpaceComponent.storageNodeIsUp(req.targetNode)) { + if (!_distributorComponent.storageNodeIsUp(req.targetNode)) { // Ignore replies from nodes that are down. return true; } @@ -449,11 +447,12 @@ BucketDBUpdater::addBucketInfoForNode( } void -BucketDBUpdater::findRelatedBucketsInDatabase(uint16_t node, const document::BucketId& bucketId, +BucketDBUpdater::findRelatedBucketsInDatabase(uint16_t node, const document::Bucket& bucket, BucketListMerger::BucketList& existing) { + auto &distributorBucketSpace(_distributorComponent.getBucketSpaceRepo().get(bucket.getBucketSpace())); std::vector<BucketDatabase::Entry> entries; - _bucketSpaceComponent.getBucketDatabase().getAll(bucketId, entries); + distributorBucketSpace.getBucketDatabase().getAll(bucket.getBucketId(), entries); for (const BucketDatabase::Entry & entry : entries) { addBucketInfoForNode(entry, node, existing); @@ -461,16 +460,16 @@ BucketDBUpdater::findRelatedBucketsInDatabase(uint16_t node, const document::Buc } void -BucketDBUpdater::updateDatabase(uint16_t node, BucketListMerger& merger) +BucketDBUpdater::updateDatabase(document::BucketSpace bucketSpace, uint16_t node, BucketListMerger& merger) { for (const document::BucketId & bucketId : merger.getRemovedEntries()) { - document::Bucket bucket(BucketSpace::placeHolder(), bucketId); - _bucketSpaceComponent.removeNodeFromDB(bucket, node); + document::Bucket bucket(bucketSpace, bucketId); + _distributorComponent.removeNodeFromDB(bucket, node); } for (const BucketListMerger::BucketEntry& entry : merger.getAddedEntries()) { - document::Bucket bucket(BucketSpace::placeHolder(), entry.first); - _bucketSpaceComponent.updateBucketDatabase( + document::Bucket bucket(bucketSpace, entry.first); + _distributorComponent.updateBucketDatabase( bucket, BucketCopy(merger.getTimestamp(), node, entry.second), DatabaseUpdate::CREATE_IF_NONEXISTING); @@ -486,19 +485,19 @@ BucketDBUpdater::isPendingClusterStateCompleted() const void BucketDBUpdater::processCompletedPendingClusterState() { - _pendingClusterState->mergeInto(_bucketSpaceComponent.getBucketDatabase()); + _pendingClusterState->mergeIntoBucketDatabases(); if (_pendingClusterState->getCommand().get()) { enableCurrentClusterStateInDistributor(); - _bucketSpaceComponent.getDistributor().getMessageSender().sendDown( + _distributorComponent.getDistributor().getMessageSender().sendDown( _pendingClusterState->getCommand()); addCurrentStateToClusterStateHistory(); } else { - _bucketSpaceComponent.getDistributor().notifyDistributionChangeEnabled(); + _distributorComponent.getDistributor().notifyDistributionChangeEnabled(); } _pendingClusterState.reset(); - _outdatedNodes.clear(); + _outdatedNodesMap.clear(); sendAllQueuedBucketRechecks(); completeTransitionTimer(); } @@ -513,7 +512,7 @@ BucketDBUpdater::enableCurrentClusterStateInDistributor() "BucketDBUpdater finished processing state %s", state.toString().c_str()); - _bucketSpaceComponent.getDistributor().enableClusterState(state); + _distributorComponent.getDistributor().enableClusterState(state); } void @@ -564,7 +563,7 @@ BucketDBUpdater::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, using namespace vespalib::xml; xos << XmlTag("bucketdb") << XmlTag("systemstate_active") - << XmlContent(_bucketSpaceComponent.getClusterState().toString()) + << XmlContent(_distributorComponent.getClusterState().toString()) << XmlEndTag(); if (_pendingClusterState) { xos << *_pendingClusterState; @@ -583,10 +582,10 @@ BucketDBUpdater::reportXmlStatus(vespalib::xml::XmlOutputStream& xos, { xos << XmlTag("storagenode") << XmlAttribute("index", entry.second.targetNode); - if (entry.second.bucket.getRawId() == 0) { + if (entry.second.bucket.getBucketId().getRawId() == 0) { xos << XmlAttribute("bucket", ALL); } else { - xos << XmlAttribute("bucket", entry.second.bucket.getId(), XmlAttribute::HEX); + xos << XmlAttribute("bucket", entry.second.bucket.getBucketId().getId(), XmlAttribute::HEX); } xos << XmlAttribute("sendtimestamp", entry.second.timestamp) << XmlEndTag(); diff --git a/storage/src/vespa/storage/distributor/bucketdbupdater.h b/storage/src/vespa/storage/distributor/bucketdbupdater.h index 994e207f200..a3c9804c2b4 100644 --- a/storage/src/vespa/storage/distributor/bucketdbupdater.h +++ b/storage/src/vespa/storage/distributor/bucketdbupdater.h @@ -6,8 +6,8 @@ #include "distributorcomponent.h" #include "distributormessagesender.h" #include "pendingclusterstate.h" -#include "distributor_bucket_space_component.h" -#include <vespa/document/bucket/bucketid.h> +#include "outdated_nodes_map.h" +#include <vespa/document/bucket/bucket.h> #include <vespa/storageapi/messageapi/returncode.h> #include <vespa/storageapi/message/bucket.h> #include <vespa/vdslib/state/clusterstate.h> @@ -27,29 +27,30 @@ class BucketDBUpdater : public framework::StatusReporter, public api::MessageHandler { public: + using OutdatedNodes = dbtransition::OutdatedNodes; + using OutdatedNodesMap = dbtransition::OutdatedNodesMap; BucketDBUpdater(Distributor& owner, DistributorBucketSpaceRepo &bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, DistributorMessageSender& sender, DistributorComponentRegister& compReg); ~BucketDBUpdater(); void flush(); - BucketOwnership checkOwnershipInPendingState(const document::BucketId&) const; - void recheckBucketInfo(uint32_t nodeIdx, const document::BucketId& bid); + BucketOwnership checkOwnershipInPendingState(const document::Bucket&) const; + void recheckBucketInfo(uint32_t nodeIdx, const document::Bucket& bucket); bool onSetSystemState(const std::shared_ptr<api::SetSystemStateCommand>& cmd) override; bool onRequestBucketInfoReply(const std::shared_ptr<api::RequestBucketInfoReply> & repl) override; bool onMergeBucketReply(const std::shared_ptr<api::MergeBucketReply>& reply) override; bool onNotifyBucketChange(const std::shared_ptr<api::NotifyBucketChangeCommand>&) override; void resendDelayedMessages(); - void storageDistributionChanged(const lib::Distribution&); + void storageDistributionChanged(); vespalib::string reportXmlStatus(vespalib::xml::XmlOutputStream&, const framework::HttpUrlPath&) const; vespalib::string getReportContentType(const framework::HttpUrlPath&) const override; bool reportStatus(std::ostream&, const framework::HttpUrlPath&) const override; void print(std::ostream& out, bool verbose, const std::string& indent) const; - DistributorComponent& getDistributorComponent() { return _bucketSpaceComponent; } + DistributorComponent& getDistributorComponent() { return _distributorComponent; } /** * Returns whether the current PendingClusterState indicates that there has @@ -63,7 +64,7 @@ public: } private: - DistributorBucketSpaceComponent _bucketSpaceComponent; + DistributorComponent _distributorComponent; class MergeReplyGuard { public: MergeReplyGuard(BucketDBUpdater& updater, const std::shared_ptr<api::MergeBucketReply>& reply) @@ -81,9 +82,9 @@ private: struct BucketRequest { BucketRequest() - : targetNode(0), bucket(0), timestamp(0) {}; + : targetNode(0), bucket(), timestamp(0) {}; - BucketRequest(uint16_t t, uint64_t currentTime, const document::BucketId& b, + BucketRequest(uint16_t t, uint64_t currentTime, const document::Bucket& b, const std::shared_ptr<MergeReplyGuard>& guard) : targetNode(t), bucket(b), @@ -91,7 +92,7 @@ private: _mergeReplyGuard(guard) {}; uint16_t targetNode; - document::BucketId bucket; + document::Bucket bucket; uint64_t timestamp; std::shared_ptr<MergeReplyGuard> _mergeReplyGuard; @@ -99,11 +100,11 @@ private: struct EnqueuedBucketRecheck { uint16_t node; - document::BucketId bucket; + document::Bucket bucket; EnqueuedBucketRecheck() : node(0), bucket() {} - EnqueuedBucketRecheck(uint16_t _node, const document::BucketId& _bucket) + EnqueuedBucketRecheck(uint16_t _node, const document::Bucket& _bucket) : node(_node), bucket(_bucket) {} @@ -121,7 +122,6 @@ private: bool hasPendingClusterState() const; bool pendingClusterStateAccepted(const std::shared_ptr<api::RequestBucketInfoReply>& repl); - bool bucketOwnedAccordingToPendingState(const document::BucketId& bucketId) const; bool processSingleBucketInfoReply(const std::shared_ptr<api::RequestBucketInfoReply>& repl); void handleSingleBucketInfoFailure(const std::shared_ptr<api::RequestBucketInfoReply>& repl, const BucketRequest& req); @@ -131,7 +131,7 @@ private: const BucketRequest& req); void convertBucketInfoToBucketList(const std::shared_ptr<api::RequestBucketInfoReply>& repl, uint16_t targetNode, BucketListMerger::BucketList& newList); - void sendRequestBucketInfo(uint16_t node, const document::BucketId& bucket, + void sendRequestBucketInfo(uint16_t node, const document::Bucket& bucket, const std::shared_ptr<MergeReplyGuard>& mergeReply); void addBucketInfoForNode(const BucketDatabase::Entry& e, uint16_t node, BucketListMerger::BucketList& existing) const; @@ -143,24 +143,24 @@ private: * in bucketId, or that bucketId is contained in, that have copies * on the given node. */ - void findRelatedBucketsInDatabase(uint16_t node, const document::BucketId& bucketId, + void findRelatedBucketsInDatabase(uint16_t node, const document::Bucket& bucket, BucketListMerger::BucketList& existing); /** Updates the bucket database from the information generated by the given bucket list merger. */ - void updateDatabase(uint16_t node, BucketListMerger& merger); + void updateDatabase(document::BucketSpace bucketSpace, uint16_t node, BucketListMerger& merger); void updateState(const lib::ClusterState& oldState, const lib::ClusterState& newState); - void removeSuperfluousBuckets(const lib::Distribution& newDistribution, const lib::ClusterState& newState); + void removeSuperfluousBuckets(const lib::ClusterState& newState); void replyToPreviousPendingClusterStateIfAny(); void enableCurrentClusterStateInDistributor(); void addCurrentStateToClusterStateHistory(); - void enqueueRecheckUntilPendingStateEnabled(uint16_t node, const document::BucketId&); + void enqueueRecheckUntilPendingStateEnabled(uint16_t node, const document::Bucket&); void sendAllQueuedBucketRechecks(); friend class BucketDBUpdater_Test; @@ -226,7 +226,7 @@ private: std::list<PendingClusterState::Summary> _history; DistributorMessageSender& _sender; std::set<EnqueuedBucketRecheck> _enqueuedRechecks; - std::unordered_set<uint16_t> _outdatedNodes; + OutdatedNodesMap _outdatedNodesMap; framework::MilliSecTimer _transitionTimer; }; diff --git a/storage/src/vespa/storage/distributor/clusterinformation.cpp b/storage/src/vespa/storage/distributor/clusterinformation.cpp index 8e956a1cf61..cd09e4f46d4 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.cpp +++ b/storage/src/vespa/storage/distributor/clusterinformation.cpp @@ -6,45 +6,6 @@ namespace storage::distributor { -bool -ClusterInformation::ownsBucket(const document::BucketId& bucketId) const -{ - try { - uint16_t distributor(getDistribution().getIdealDistributorNode( - getClusterState(), bucketId)); - - return (getDistributorIndex() == distributor); - } catch (lib::TooFewBucketBitsInUseException& e) { - return false; - } catch (lib::NoDistributorsAvailableException& e) { - return false; - } -} - -bool -ClusterInformation::nodeInSameGroupAsSelf(uint16_t otherNode) const -{ - return (getDistribution().getNodeGraph().getGroupForNode(otherNode) - == getDistribution().getNodeGraph().getGroupForNode(getDistributorIndex())); -} - -vespalib::string -ClusterInformation::getDistributionHash() const -{ - return getDistribution().getNodeGraph().getDistributionConfigHash(); -} - -std::vector<uint16_t> -ClusterInformation::getIdealStorageNodesForState( - const lib::ClusterState& clusterState, - const document::BucketId& bucketId) const -{ - return getDistribution().getIdealStorageNodes( - clusterState, - bucketId, - getStorageUpStates()); -} - uint16_t ClusterInformation::getStorageNodeCount() const { diff --git a/storage/src/vespa/storage/distributor/clusterinformation.h b/storage/src/vespa/storage/distributor/clusterinformation.h index 4494b137f89..25f303d0f52 100644 --- a/storage/src/vespa/storage/distributor/clusterinformation.h +++ b/storage/src/vespa/storage/distributor/clusterinformation.h @@ -26,22 +26,10 @@ public: virtual uint16_t getDistributorIndex() const = 0; - virtual const lib::Distribution& getDistribution() const = 0; - virtual const lib::ClusterState& getClusterState() const = 0; virtual const char* getStorageUpStates() const = 0; - bool ownsBucket(const document::BucketId& bucketId) const; - - bool nodeInSameGroupAsSelf(uint16_t otherNode) const; - - vespalib::string getDistributionHash() const; - - std::vector<uint16_t> getIdealStorageNodesForState( - const lib::ClusterState& clusterState, - const document::BucketId& bucketId) const; - uint16_t getStorageNodeCount() const; }; diff --git a/storage/src/vespa/storage/distributor/distributor.cpp b/storage/src/vespa/storage/distributor/distributor.cpp index 2de29697733..1edcbe75dd6 100644 --- a/storage/src/vespa/storage/distributor/distributor.cpp +++ b/storage/src/vespa/storage/distributor/distributor.cpp @@ -6,6 +6,7 @@ #include "idealstatemetricsset.h" #include "ownership_transfer_safe_time_point_calculator.h" #include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/storage/bucketdb/mapbucketdatabase.h> #include <vespa/storage/distributor/maintenance/simplemaintenancescanner.h> #include <vespa/storage/distributor/maintenance/simplebucketprioritydatabase.h> @@ -72,12 +73,12 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _operationOwner(*this, _component.getClock()), _maintenanceOperationOwner(*this, _component.getClock()), _pendingMessageTracker(compReg), - _bucketDBUpdater(*this, *_bucketSpaceRepo, getDefaultBucketSpace(), *this, compReg), + _bucketDBUpdater(*this, *_bucketSpaceRepo, *this, compReg), _distributorStatusDelegate(compReg, *this, *this), _bucketDBStatusDelegate(compReg, *this, _bucketDBUpdater), - _idealStateManager(*this, *_bucketSpaceRepo, getDefaultBucketSpace(), compReg, + _idealStateManager(*this, *_bucketSpaceRepo, compReg, manageActiveBucketCopies), - _externalOperationHandler(*this, *_bucketSpaceRepo, getDefaultBucketSpace(), + _externalOperationHandler(*this, *_bucketSpaceRepo, _idealStateManager, compReg), _threadPool(threadPool), _initializingIsUp(true), @@ -87,7 +88,7 @@ Distributor::Distributor(DistributorComponentRegister& compReg, _bucketPriorityDb(new SimpleBucketPriorityDatabase()), _scanner(new SimpleMaintenanceScanner( *_bucketPriorityDb, _idealStateManager, - getDefaultBucketSpace().getBucketDatabase())), + *_bucketSpaceRepo)), _throttlingStarter(new ThrottlingOperationStarter( _maintenanceOperationOwner)), _blockingStarter(new BlockingOperationStarter(_pendingMessageTracker, @@ -154,7 +155,7 @@ const DistributorBucketSpace& Distributor::getDefaultBucketSpace() const noexcep BucketOwnership Distributor::checkOwnershipInPendingState(const document::Bucket &b) const { - return _bucketDBUpdater.checkOwnershipInPendingState(b.getBucketId()); + return _bucketDBUpdater.checkOwnershipInPendingState(b); } void @@ -455,7 +456,7 @@ Distributor::storageDistributionChanged() void Distributor::recheckBucketInfo(uint16_t nodeIdx, const document::Bucket &bucket) { - _bucketDBUpdater.recheckBucketInfo(nodeIdx, bucket.getBucketId()); + _bucketDBUpdater.recheckBucketInfo(nodeIdx, bucket); } namespace { @@ -527,25 +528,13 @@ Distributor::checkBucketForSplit(document::BucketSpace bucketSpace, } Operation::SP operation = - _idealStateManager.generateInterceptingSplit(e, priority); + _idealStateManager.generateInterceptingSplit(bucketSpace, e, priority); if (operation.get()) { _maintenanceOperationOwner.start(operation, priority); } } -const lib::Distribution& -Distributor::getDistribution() const -{ - // FIXME having _distribution be mutable for this is smelly. Is this only - // in place for the sake of tests? - if (!_distribution.get()) { - _distribution = _component.getDistribution(); - } - - return *_distribution; -} - void Distributor::enableNextDistribution() { @@ -553,13 +542,13 @@ Distributor::enableNextDistribution() _distribution = _nextDistribution; propagateDefaultDistribution(_distribution); _nextDistribution = std::shared_ptr<lib::Distribution>(); - _bucketDBUpdater.storageDistributionChanged(getDistribution()); + _bucketDBUpdater.storageDistributionChanged(); } } void Distributor::propagateDefaultDistribution( - std::shared_ptr<lib::Distribution> distribution) + std::shared_ptr<const lib::Distribution> distribution) { _bucketSpaceRepo->setDefaultDistribution(std::move(distribution)); } @@ -684,9 +673,10 @@ Distributor::scanNextBucket() updateInternalMetricsForCompletedScan(); _scanner->reset(); } else { + const auto &distribution(_bucketSpaceRepo->get(scanResult.getBucketSpace()).getDistribution()); _bucketDBMetricUpdater.visit( scanResult.getEntry(), - _component.getDistribution()->getRedundancy()); + distribution.getRedundancy()); } return scanResult; } diff --git a/storage/src/vespa/storage/distributor/distributor.h b/storage/src/vespa/storage/distributor/distributor.h index f59b47574ba..9406dacf358 100644 --- a/storage/src/vespa/storage/distributor/distributor.h +++ b/storage/src/vespa/storage/distributor/distributor.h @@ -6,7 +6,6 @@ #include "bucketdbupdater.h" #include "pendingmessagetracker.h" #include "externaloperationhandler.h" -#include "maintenancebucket.h" #include "min_replica_provider.h" #include "distributorinterface.h" @@ -114,8 +113,6 @@ public: */ void checkBucketForSplit(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, uint8_t priority) override; - const lib::Distribution& getDistribution() const override; - const lib::ClusterState& getClusterState() const override { return _clusterState; } @@ -161,6 +158,8 @@ public: DistributorBucketSpace& getDefaultBucketSpace() noexcept; const DistributorBucketSpace& getDefaultBucketSpace() const noexcept; + DistributorBucketSpaceRepo &getBucketSpaceRepo() noexcept { return *_bucketSpaceRepo; } + const DistributorBucketSpaceRepo &getBucketSpaceRepo() const noexcept { return *_bucketSpaceRepo; } private: friend class Distributor_Test; @@ -232,7 +231,7 @@ private: Operation::SP& operation); void enableNextDistribution(); - void propagateDefaultDistribution(std::shared_ptr<lib::Distribution>); + void propagateDefaultDistribution(std::shared_ptr<const lib::Distribution>); lib::ClusterState _clusterState; @@ -251,7 +250,7 @@ private: IdealStateManager _idealStateManager; ExternalOperationHandler _externalOperationHandler; - mutable std::shared_ptr<lib::Distribution> _distribution; + std::shared_ptr<lib::Distribution> _distribution; std::shared_ptr<lib::Distribution> _nextDistribution; using MessageQueue = std::vector<std::shared_ptr<api::StorageMessage>>; diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp index b33ff72a654..68fe9f441d7 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.cpp @@ -5,7 +5,10 @@ namespace storage { namespace distributor { -DistributorBucketSpace::DistributorBucketSpace() { +DistributorBucketSpace::DistributorBucketSpace() + : _bucketDatabase(), + _distribution() +{ } DistributorBucketSpace::~DistributorBucketSpace() { diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space.h b/storage/src/vespa/storage/distributor/distributor_bucket_space.h index 17be92126cb..30893e8cfb1 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space.h +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space.h @@ -26,7 +26,7 @@ namespace distributor { */ class DistributorBucketSpace { MapBucketDatabase _bucketDatabase; - std::shared_ptr<lib::Distribution> _distribution; + std::shared_ptr<const lib::Distribution> _distribution; public: DistributorBucketSpace(); ~DistributorBucketSpace(); @@ -43,7 +43,7 @@ public: return _bucketDatabase; } - void setDistribution(lib::Distribution::SP distribution) { + void setDistribution(std::shared_ptr<const lib::Distribution> distribution) { _distribution = std::move(distribution); } diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space_component.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space_component.cpp deleted file mode 100644 index 4616179ae82..00000000000 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space_component.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "distributor_bucket_space_component.h" - -namespace storage::distributor { - -DistributorBucketSpaceComponent::DistributorBucketSpaceComponent( - DistributorInterface& distributor, - DistributorBucketSpaceRepo &bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, - DistributorComponentRegister& compReg, - const std::string& name) - : DistributorComponent(distributor, bucketSpaceRepo, compReg, name), - _bucketSpace(bucketSpace) -{ -} - -} diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space_component.h b/storage/src/vespa/storage/distributor/distributor_bucket_space_component.h deleted file mode 100644 index 9c04cb6b67f..00000000000 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space_component.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include "distributorcomponent.h" -#include "distributor_bucket_space.h" - -namespace storage { -namespace distributor { - -/** - * Component bound to a specific bucket space, with utility operations to - * operate on buckets in this space. - */ -class DistributorBucketSpaceComponent : public DistributorComponent { - DistributorBucketSpace& _bucketSpace; -public: - DistributorBucketSpaceComponent(DistributorInterface& distributor, - DistributorBucketSpaceRepo &bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, - DistributorComponentRegister& compReg, - const std::string& name); - - BucketDatabase& getBucketDatabase() override { - return _bucketSpace.getBucketDatabase(); - } - - const BucketDatabase& getBucketDatabase() const override { - return _bucketSpace.getBucketDatabase(); - } - - const lib::Distribution& getDistribution() const override { - return _bucketSpace.getDistribution(); - } - -}; - -} -} diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.cpp b/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.cpp index 67ca2397b11..d414f520bc2 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.cpp +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.cpp @@ -1,43 +1,67 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/vdslib/distribution/distribution.h> #include <cassert> #include <vespa/log/log.h> -LOG_SETUP(".distributor.managed_bucket_space_repo"); +LOG_SETUP(".distributor.distributor_bucket_space_repo"); using document::BucketSpace; namespace storage { namespace distributor { -DistributorBucketSpaceRepo::DistributorBucketSpaceRepo() { +DistributorBucketSpaceRepo::DistributorBucketSpaceRepo() + : _map() +{ + add(BucketSpace::placeHolder(), std::make_unique<DistributorBucketSpace>()); } DistributorBucketSpaceRepo::~DistributorBucketSpaceRepo() { } +void +DistributorBucketSpaceRepo::add(document::BucketSpace bucketSpace, std::unique_ptr<DistributorBucketSpace> distributorBucketSpace) +{ + _map.emplace(bucketSpace, std::move(distributorBucketSpace)); +} + void DistributorBucketSpaceRepo::setDefaultDistribution( - std::shared_ptr<lib::Distribution> distr) + std::shared_ptr<const lib::Distribution> distr) { LOG(debug, "Got new default distribution '%s'", distr->toString().c_str()); // TODO all spaces, per-space config transforms - _defaultSpace.setDistribution(std::move(distr)); + getDefaultSpace().setDistribution(std::move(distr)); } DistributorBucketSpace & DistributorBucketSpaceRepo::get(BucketSpace bucketSpace) { - assert(bucketSpace == BucketSpace::placeHolder()); - return _defaultSpace; + auto itr = _map.find(bucketSpace); + assert(itr != _map.end()); + return *itr->second; } const DistributorBucketSpace & DistributorBucketSpaceRepo::get(BucketSpace bucketSpace) const { - assert(bucketSpace == BucketSpace::placeHolder()); - return _defaultSpace; + auto itr = _map.find(bucketSpace); + assert(itr != _map.end()); + return *itr->second; +} + +DistributorBucketSpace & +DistributorBucketSpaceRepo::getDefaultSpace() noexcept +{ + return get(BucketSpace::placeHolder()); +} + +const DistributorBucketSpace & +DistributorBucketSpaceRepo::getDefaultSpace() const noexcept +{ + return get(BucketSpace::placeHolder()); } } diff --git a/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.h b/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.h index 41eebf4bc4b..c3661b53e69 100644 --- a/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.h +++ b/storage/src/vespa/storage/distributor/distributor_bucket_space_repo.h @@ -1,17 +1,25 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once -#include "distributor_bucket_space.h" #include <vespa/document/bucket/bucketspace.h> #include <memory> +#include <unordered_map> namespace storage { +namespace lib { class Distribution; } + namespace distributor { +class DistributorBucketSpace; + class DistributorBucketSpaceRepo { - // TODO: multiple spaces. This is just to start re-wiring things. - DistributorBucketSpace _defaultSpace; +public: + using BucketSpaceMap = std::unordered_map<document::BucketSpace, std::unique_ptr<DistributorBucketSpace>, document::BucketSpace::hash>; + +private: + BucketSpaceMap _map; + public: DistributorBucketSpaceRepo(); ~DistributorBucketSpaceRepo(); @@ -21,14 +29,15 @@ public: DistributorBucketSpaceRepo(DistributorBucketSpaceRepo&&) = delete; DistributorBucketSpaceRepo& operator=(DistributorBucketSpaceRepo&&) = delete; - DistributorBucketSpace& getDefaultSpace() noexcept { return _defaultSpace; } - const DistributorBucketSpace& getDefaultSpace() const noexcept { - return _defaultSpace; - } + DistributorBucketSpace& getDefaultSpace() noexcept; + const DistributorBucketSpace& getDefaultSpace() const noexcept; DistributorBucketSpace &get(document::BucketSpace bucketSpace); const DistributorBucketSpace &get(document::BucketSpace bucketSpace) const; - void setDefaultDistribution(std::shared_ptr<lib::Distribution> distr); + void setDefaultDistribution(std::shared_ptr<const lib::Distribution> distr); + BucketSpaceMap::const_iterator begin() const { return _map.begin(); } + BucketSpaceMap::const_iterator end() const { return _map.end(); } + void add(document::BucketSpace bucketSpace, std::unique_ptr<DistributorBucketSpace> distributorBucketSpace); }; } diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.cpp b/storage/src/vespa/storage/distributor/distributorcomponent.cpp index f8a0a5504ec..f0643eec37e 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.cpp +++ b/storage/src/vespa/storage/distributor/distributorcomponent.cpp @@ -3,7 +3,8 @@ #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/messageapi/storagereply.h> #include <vespa/vdslib/distribution/distribution.h> -#include <vespa/storage/distributor/distributor_bucket_space_repo.h> +#include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/log/log.h> LOG_SETUP(".distributorstoragelink"); @@ -137,11 +138,6 @@ DistributorComponent::nodeAddress(uint16_t nodeIndex) const nodeIndex); } -uint16_t -DistributorComponent::getRedundancy() const { - return getDistribution().getRedundancy(); -} - bool DistributorComponent::checkDistribution( api::StorageCommand &cmd, diff --git a/storage/src/vespa/storage/distributor/distributorcomponent.h b/storage/src/vespa/storage/distributor/distributorcomponent.h index 307ddc20299..33e86d423e7 100644 --- a/storage/src/vespa/storage/distributor/distributorcomponent.h +++ b/storage/src/vespa/storage/distributor/distributorcomponent.h @@ -89,11 +89,6 @@ public: bool storageNodeIsUp(uint32_t nodeIndex) const; /** - * Returns the current desired redundancy level. - */ - uint16_t getRedundancy() const; - - /** * Verifies that the given command has been received at the * correct distributor based on the current system state. */ @@ -157,12 +152,6 @@ public: return _distributor; } - virtual BucketDatabase& getBucketDatabase() = 0; - virtual const BucketDatabase& getBucketDatabase() const = 0; - // FIXME this hides the StorageComponent::getDistribution method, which - // even has a different signature altogether...! - virtual const lib::Distribution& getDistribution() const = 0; - DistributorBucketSpaceRepo &getBucketSpaceRepo() { return _bucketSpaceRepo; } const DistributorBucketSpaceRepo &getBucketSpaceRepo() const { return _bucketSpaceRepo; } diff --git a/storage/src/vespa/storage/distributor/distributorinterface.h b/storage/src/vespa/storage/distributor/distributorinterface.h index cd51387964a..bf27dc432b6 100644 --- a/storage/src/vespa/storage/distributor/distributorinterface.h +++ b/storage/src/vespa/storage/distributor/distributorinterface.h @@ -5,7 +5,6 @@ #include <vespa/storage/common/messagesender.h> #include <vespa/storage/distributor/pendingmessagetracker.h> #include <vespa/storageapi/message/state.h> -#include <vespa/storage/distributor/maintenancebucket.h> #include <vespa/storage/bucketdb/bucketdatabase.h> #include <vespa/storage/distributor/bucketgctimecalculator.h> #include <vespa/storage/distributor/distributormetricsset.h> @@ -21,7 +20,6 @@ class DistributorInterface : public DistributorMessageSender { public: virtual PendingMessageTracker& getPendingMessageTracker() = 0; - virtual const lib::Distribution& getDistribution() const = 0; virtual DistributorMetricSet& getMetrics() = 0; diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp index 77a86a3756d..902726fff41 100644 --- a/storage/src/vespa/storage/distributor/externaloperationhandler.cpp +++ b/storage/src/vespa/storage/distributor/externaloperationhandler.cpp @@ -20,6 +20,7 @@ #include <vespa/storageapi/message/batch.h> #include <vespa/storageapi/message/stat.h> #include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/log/log.h> LOG_SETUP(".distributor.manager"); @@ -29,10 +30,9 @@ namespace storage::distributor { ExternalOperationHandler::ExternalOperationHandler( Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, const MaintenanceOperationGenerator& gen, DistributorComponentRegister& compReg) - : DistributorBucketSpaceComponent(owner, bucketSpaceRepo, bucketSpace, compReg, "External operation handler"), + : DistributorComponent(owner, bucketSpaceRepo, compReg, "External operation handler"), _operationGenerator(gen), _rejectFeedBeforeTimeReached() // At epoch { } diff --git a/storage/src/vespa/storage/distributor/externaloperationhandler.h b/storage/src/vespa/storage/distributor/externaloperationhandler.h index 763796767cf..c405b63aa81 100644 --- a/storage/src/vespa/storage/distributor/externaloperationhandler.h +++ b/storage/src/vespa/storage/distributor/externaloperationhandler.h @@ -6,7 +6,6 @@ #include <vespa/document/bucket/bucketidfactory.h> #include <vespa/vdslib/state/clusterstate.h> #include <vespa/storage/distributor/distributorcomponent.h> -#include <vespa/storage/distributor/distributor_bucket_space_component.h> #include <vespa/storageapi/messageapi/messagehandler.h> #include <chrono> @@ -20,7 +19,7 @@ namespace distributor { class Distributor; class MaintenanceOperationGenerator; -class ExternalOperationHandler : public DistributorBucketSpaceComponent, +class ExternalOperationHandler : public DistributorComponent, public api::MessageHandler { public: @@ -39,7 +38,6 @@ public: ExternalOperationHandler(Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, const MaintenanceOperationGenerator&, DistributorComponentRegister& compReg); diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.cpp b/storage/src/vespa/storage/distributor/idealstatemanager.cpp index 952f2f2f539..4ceeb387341 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemanager.cpp @@ -10,10 +10,13 @@ #include <vespa/storageapi/message/multioperation.h> #include <vespa/storage/common/bucketmessages.h> #include <vespa/vespalib/stllike/hash_map.hpp> +#include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/log/log.h> LOG_SETUP(".distributor.operation.queue"); +using document::BucketSpace; using storage::lib::Node; using storage::lib::NodeType; @@ -23,12 +26,12 @@ namespace distributor { IdealStateManager::IdealStateManager( Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, DistributorComponentRegister& compReg, bool manageActiveBucketCopies) : HtmlStatusReporter("idealstateman", "Ideal state manager"), _metrics(new IdealStateMetricSet), - _distributorComponent(owner, bucketSpaceRepo, bucketSpace, compReg, "Ideal state manager") + _distributorComponent(owner, bucketSpaceRepo, compReg, "Ideal state manager"), + _bucketSpaceRepo(bucketSpaceRepo) { _distributorComponent.registerStatusPage(*this); _distributorComponent.registerMetric(*_metrics); @@ -74,17 +77,17 @@ IdealStateManager::iAmUp() const void IdealStateManager::fillParentAndChildBuckets(StateChecker::Context& c) const { - _distributorComponent.getBucketDatabase().getAll(c.bucketId, c.entries); + c.db.getAll(c.getBucketId(), c.entries); if (c.entries.empty()) { LOG(spam, "Did not find bucket %s in bucket database", - c.bucketId.toString().c_str()); + c.bucket.toString().c_str()); } } void IdealStateManager::fillSiblingBucket(StateChecker::Context& c) const { - c.siblingEntry = _distributorComponent.getBucketDatabase().get(c.siblingBucket); + c.siblingEntry = c.db.get(c.siblingBucket); } BucketDatabase::Entry* @@ -92,7 +95,7 @@ IdealStateManager::getEntryForPrimaryBucket(StateChecker::Context& c) const { for (uint32_t j = 0; j < c.entries.size(); ++j) { BucketDatabase::Entry& e = c.entries[j]; - if (e.getBucketId() == c.bucketId) { + if (e.getBucketId() == c.getBucketId()) { return &e; } } @@ -143,7 +146,8 @@ IdealStateManager::generateHighestPriority( const document::Bucket &bucket, NodeMaintenanceStatsTracker& statsTracker) const { - StateChecker::Context c(_distributorComponent, statsTracker, bucket.getBucketId()); + auto &distributorBucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); + StateChecker::Context c(_distributorComponent, distributorBucketSpace, statsTracker, bucket); fillParentAndChildBuckets(c); fillSiblingBucket(c); @@ -172,11 +176,14 @@ IdealStateManager::prioritize( } IdealStateOperation::SP -IdealStateManager::generateInterceptingSplit(const BucketDatabase::Entry& e, +IdealStateManager::generateInterceptingSplit(BucketSpace bucketSpace, + const BucketDatabase::Entry& e, api::StorageMessage::Priority pri) { NodeMaintenanceStatsTracker statsTracker; - StateChecker::Context c(_distributorComponent, statsTracker, e.getBucketId()); + document::Bucket bucket(bucketSpace, e.getBucketId()); + auto &distributorBucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); + StateChecker::Context c(_distributorComponent, distributorBucketSpace, statsTracker, bucket); if (e.valid()) { c.entry = e; @@ -210,7 +217,8 @@ std::vector<MaintenanceOperation::SP> IdealStateManager::generateAll(const document::Bucket &bucket, NodeMaintenanceStatsTracker& statsTracker) const { - StateChecker::Context c(_distributorComponent, statsTracker, bucket.getBucketId()); + auto &distributorBucketSpace(_bucketSpaceRepo.get(bucket.getBucketSpace())); + StateChecker::Context c(_distributorComponent, distributorBucketSpace, statsTracker, bucket); fillParentAndChildBuckets(c); fillSiblingBucket(c); BucketDatabase::Entry* e(getEntryForPrimaryBucket(c)); @@ -233,7 +241,7 @@ IdealStateManager::generateAll(const document::Bucket &bucket, void IdealStateManager::getBucketStatus( - document::BucketSpace bucketSpace, + BucketSpace bucketSpace, const BucketDatabase::Entry& entry, NodeMaintenanceStatsTracker& statsTracker, std::ostream& out) const @@ -265,8 +273,10 @@ IdealStateManager::getBucketStatus( void IdealStateManager::getBucketStatus(std::ostream& out) const { - StatusBucketVisitor proc(*this, document::BucketSpace::placeHolder(), out); - _distributorComponent.getBucketDatabase().forEach(proc); + BucketSpace bucketSpace(BucketSpace::placeHolder()); + StatusBucketVisitor proc(*this, bucketSpace, out); + auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpace)); + distributorBucketSpace.getBucketDatabase().forEach(proc); } } // distributor diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.h b/storage/src/vespa/storage/distributor/idealstatemanager.h index b2ca5f8cef6..b9607b35d28 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.h +++ b/storage/src/vespa/storage/distributor/idealstatemanager.h @@ -4,7 +4,7 @@ #include <deque> #include <map> #include <set> -#include <vespa/storage/distributor/distributor_bucket_space_component.h> +#include <vespa/storage/distributor/distributorcomponent.h> #include <vespa/storage/distributor/statechecker.h> #include <vespa/storage/distributor/maintenance/maintenanceprioritygenerator.h> #include <vespa/storage/distributor/maintenance/maintenanceoperationgenerator.h> @@ -41,7 +41,6 @@ public: IdealStateManager(Distributor& owner, DistributorBucketSpaceRepo& bucketSpaceRepo, - DistributorBucketSpace& bucketSpace, DistributorComponentRegister& compReg, bool manageActiveBucketCopies); @@ -68,6 +67,7 @@ public: * with higher priority than the given one. */ IdealStateOperation::SP generateInterceptingSplit( + document::BucketSpace bucketSpace, const BucketDatabase::Entry& e, api::StorageMessage::Priority pri); @@ -85,6 +85,8 @@ public: return _distributorComponent; } StorageComponent::LoadTypeSetSP getLoadTypes() { return _distributorComponent.getLoadTypes(); } + DistributorBucketSpaceRepo &getBucketSpaceRepo() { return _bucketSpaceRepo; } + const DistributorBucketSpaceRepo &getBucketSpaceRepo() const { return _bucketSpaceRepo; } private: void fillParentAndChildBuckets(StateChecker::Context& c) const; @@ -111,7 +113,8 @@ private: std::vector<StateChecker::SP> _stateCheckers; SplitBucketStateChecker* _splitBucketStateChecker; - DistributorBucketSpaceComponent _distributorComponent; + DistributorComponent _distributorComponent; + DistributorBucketSpaceRepo &_bucketSpaceRepo; std::vector<IdealStateOperation::SP> generateOperationsForBucket( StateChecker::Context& c) const; @@ -140,7 +143,6 @@ private: const BucketDatabase::Entry& entry, NodeMaintenanceStatsTracker& statsTracker, std::ostream& out) const; - }; } // distributor diff --git a/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h b/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h index 783e8e1e5ba..c1d76b57c7c 100644 --- a/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h +++ b/storage/src/vespa/storage/distributor/maintenance/maintenancescanner.h @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/document/bucket/bucketspace.h> #include <vespa/storage/bucketdb/bucketdatabase.h> namespace storage { @@ -13,20 +14,22 @@ public: class ScanResult { bool _done; + document::BucketSpace _bucketSpace; BucketDatabase::Entry _entry; public: bool isDone() const { return _done; } + document::BucketSpace getBucketSpace() const { return _bucketSpace; } const BucketDatabase::Entry& getEntry() const { return _entry; } static ScanResult createDone() { return ScanResult(true); } - static ScanResult createNotDone(BucketDatabase::Entry entry) { - return ScanResult(entry); + static ScanResult createNotDone(document::BucketSpace bucketSpace, BucketDatabase::Entry entry) { + return ScanResult(bucketSpace, entry); } private: - ScanResult(bool done) : _done(done), _entry() {} - ScanResult(const BucketDatabase::Entry& e) : _done(false), _entry(e) {} + ScanResult(bool done) : _done(done), _bucketSpace(document::BucketSpace::placeHolder()), _entry() {} + ScanResult(document::BucketSpace bucketSpace, const BucketDatabase::Entry& e) : _done(false), _bucketSpace(bucketSpace), _entry(e) {} }; virtual ScanResult scanNext() = 0; diff --git a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp index 2bdef7ed320..870dcc25a4b 100644 --- a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp +++ b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.cpp @@ -1,8 +1,20 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "simplemaintenancescanner.h" +#include <vespa/storage/distributor/distributor_bucket_space.h> namespace storage::distributor { +SimpleMaintenanceScanner::SimpleMaintenanceScanner(BucketPriorityDatabase& bucketPriorityDb, + const MaintenancePriorityGenerator& priorityGenerator, + const DistributorBucketSpaceRepo& bucketSpaceRepo) + : _bucketPriorityDb(bucketPriorityDb), + _priorityGenerator(priorityGenerator), + _bucketSpaceRepo(bucketSpaceRepo), + _bucketSpaceItr(_bucketSpaceRepo.begin()), + _bucketCursor() +{ +} + SimpleMaintenanceScanner::~SimpleMaintenanceScanner() {} SimpleMaintenanceScanner::PendingMaintenanceStats::PendingMaintenanceStats() {} @@ -14,19 +26,28 @@ SimpleMaintenanceScanner::PendingMaintenanceStats::operator = (const PendingMain MaintenanceScanner::ScanResult SimpleMaintenanceScanner::scanNext() { - BucketDatabase::Entry entry(_bucketDb.getNext(_bucketCursor)); - if (!entry.valid()) { - return ScanResult::createDone(); + for (;;) { + if (_bucketSpaceItr == _bucketSpaceRepo.end()) { + return ScanResult::createDone(); + } + const auto &bucketDb(_bucketSpaceItr->second->getBucketDatabase()); + BucketDatabase::Entry entry(bucketDb.getNext(_bucketCursor)); + if (!entry.valid()) { + ++_bucketSpaceItr; + _bucketCursor = document::BucketId(); + continue; + } + prioritizeBucket(document::Bucket(_bucketSpaceItr->first, entry.getBucketId())); + _bucketCursor = entry.getBucketId(); + return ScanResult::createNotDone(_bucketSpaceItr->first, entry); } - prioritizeBucket(document::Bucket(document::BucketSpace::placeHolder(), entry.getBucketId())); - _bucketCursor = entry.getBucketId(); - return ScanResult::createNotDone(entry); } void SimpleMaintenanceScanner::reset() { _bucketCursor = document::BucketId(); + _bucketSpaceItr = _bucketSpaceRepo.begin(); _pendingMaintenance = PendingMaintenanceStats(); } diff --git a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h index 05de7674d6a..f4ad53957e9 100644 --- a/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h +++ b/storage/src/vespa/storage/distributor/maintenance/simplemaintenancescanner.h @@ -5,7 +5,7 @@ #include "bucketprioritydatabase.h" #include "maintenanceprioritygenerator.h" #include "node_maintenance_stats_tracker.h" -#include <vespa/storage/bucketdb/bucketdatabase.h> +#include <vespa/storage/distributor/distributor_bucket_space_repo.h> namespace storage { namespace distributor { @@ -31,18 +31,14 @@ public: private: BucketPriorityDatabase& _bucketPriorityDb; const MaintenancePriorityGenerator& _priorityGenerator; - const BucketDatabase& _bucketDb; + const DistributorBucketSpaceRepo &_bucketSpaceRepo; + DistributorBucketSpaceRepo::BucketSpaceMap::const_iterator _bucketSpaceItr; document::BucketId _bucketCursor; PendingMaintenanceStats _pendingMaintenance; public: SimpleMaintenanceScanner(BucketPriorityDatabase& bucketPriorityDb, const MaintenancePriorityGenerator& priorityGenerator, - const BucketDatabase& bucketDb) - : _bucketPriorityDb(bucketPriorityDb), - _priorityGenerator(priorityGenerator), - _bucketDb(bucketDb), - _bucketCursor() - {} + const DistributorBucketSpaceRepo& bucketSpaceRepo); SimpleMaintenanceScanner(const SimpleMaintenanceScanner&) = delete; SimpleMaintenanceScanner& operator=(const SimpleMaintenanceScanner&) = delete; ~SimpleMaintenanceScanner(); diff --git a/storage/src/vespa/storage/distributor/maintenancebucket.h b/storage/src/vespa/storage/distributor/maintenancebucket.h deleted file mode 100644 index a44381830c5..00000000000 --- a/storage/src/vespa/storage/distributor/maintenancebucket.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#pragma once - -#include <vespa/document/bucket/bucketid.h> -#include <vespa/vespalib/util/stringfmt.h> -#include <vespa/storage/distributor/maintenance/maintenancepriority.h> - -namespace storage { - -namespace distributor { - -/** - * Simple container to communicate a bucket that needs to be - * checked for maintenanceoperations. - */ -class MaintenanceBucket { -public: - typedef MaintenancePriority::Priority Priority; - - MaintenanceBucket() - : node(0), - pri(MaintenancePriority::NO_MAINTENANCE_NEEDED) - {} - - MaintenanceBucket(const document::BucketId& bid_, - uint16_t node_, - Priority pri_) - : bid(bid_), - node(node_), - pri(pri_) - { - - } - - // The bucket to be checked. - document::BucketId bid; - - // The primary node of the bucket. - uint16_t node; - - // The priority to check the bucket. - Priority pri; - - bool requiresMaintenance() const { - return pri != MaintenancePriority::NO_MAINTENANCE_NEEDED; - } - - std::string toString() const { - return vespalib::make_string("MaintenanceBucket(%s: Node %d, Pri %s)", - bid.toString().c_str(), - (int)node, - MaintenancePriority::toString(pri).c_str()); - } -}; - -} - -} - diff --git a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp index 7ef03cb696a..659a7f1d435 100644 --- a/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/putoperation.cpp @@ -283,7 +283,8 @@ PutOperation::onStart(DistributorMessageSender& sender) _bucketSpace.getBucketDatabase(), idealNodeCalculator, _manager.getDistributor().getConfig().getMinimalBucketSplit(), - _bucketSpace.getDistribution().getRedundancy()); + _bucketSpace.getDistribution().getRedundancy(), + _msg->getBucket().getBucketSpace()); OperationTargetList targets(targetResolver.getTargets( OperationTargetResolver::PUT, bid)); diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index a4cebcc7c3e..9f92d313f1f 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -3,6 +3,7 @@ #include "visitoroperation.h" #include <vespa/storage/storageserver/storagemetricsset.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storage/distributor/bucketownership.h> #include <vespa/storage/distributor/operations/external/visitororder.h> #include <vespa/storage/distributor/visitormetricsset.h> diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/garbagecollectionoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/garbagecollectionoperation.cpp index d78262709e3..53d9cc018f9 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/garbagecollectionoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/garbagecollectionoperation.cpp @@ -3,6 +3,7 @@ #include "garbagecollectionoperation.h" #include <vespa/storage/distributor/idealstatemanager.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/storageapi/message/removelocation.h> #include <vespa/log/log.h> @@ -21,7 +22,7 @@ GarbageCollectionOperation::~GarbageCollectionOperation() { } void GarbageCollectionOperation::onStart(DistributorMessageSender& sender) { - BucketDatabase::Entry entry = _manager->getDistributorComponent().getBucketDatabase().get(getBucketId()); + BucketDatabase::Entry entry = _bucketSpace->getBucketDatabase().get(getBucketId()); std::vector<uint16_t> nodes = entry->getNodes(); for (uint32_t i = 0; i < nodes.size(); i++) { @@ -62,11 +63,11 @@ GarbageCollectionOperation::onReceive(DistributorMessageSender&, if (_tracker.finished()) { if (_ok) { - BucketDatabase::Entry dbentry = _manager->getDistributorComponent().getBucketDatabase().get(getBucketId()); + BucketDatabase::Entry dbentry = _bucketSpace->getBucketDatabase().get(getBucketId()); if (dbentry.valid()) { dbentry->setLastGarbageCollectionTime( _manager->getDistributorComponent().getClock().getTimeInSeconds().getTime()); - _manager->getDistributorComponent().getBucketDatabase().update(dbentry); + _bucketSpace->getBucketDatabase().update(dbentry); } } diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.cpp index 3095dce7b87..2337129e375 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.cpp @@ -4,6 +4,7 @@ #include <vespa/storage/distributor/pendingmessagetracker.h> #include <vespa/storage/distributor/idealstatemetricsset.h> #include <vespa/storage/distributor/pendingmessagetracker.h> +#include <vespa/storage/distributor/distributor_bucket_space_repo.h> #include <vespa/storageapi/messageapi/maintenancecommand.h> #include <vespa/log/log.h> @@ -25,7 +26,8 @@ const uint32_t IdealStateOperation::MAINTENANCE_MESSAGE_TYPES[] = }; IdealStateOperation::IdealStateOperation(const BucketAndNodes& bucketAndNodes) - : _manager(NULL), + : _manager(nullptr), + _bucketSpace(nullptr), _bucketAndNodes(bucketAndNodes), _ok(true), _priority(255) @@ -78,6 +80,12 @@ BucketAndNodes::toString() const } void +IdealStateOperation::setIdealStateManager(IdealStateManager* manager) { + _manager = manager; + _bucketSpace = &_manager->getBucketSpaceRepo().get(getBucket().getBucketSpace()); +}; + +void IdealStateOperation::done() { if (_manager != NULL) { @@ -188,19 +196,10 @@ checkNullBucketRequestBucketInfoMessage(uint16_t node, const PendingMessageTracker& tracker) { RequestBucketInfoChecker rchk; - for (;;) { - // Check messages sent to null-bucket (i.e. any bucket) for the node. - document::Bucket nullBucket(bucketSpace, document::BucketId()); - tracker.checkPendingMessages(node, nullBucket, rchk); - if (rchk.blocked) { - return true; - } - if (bucketSpace == BucketSpace::placeHolder()) { - break; - } - bucketSpace = BucketSpace::placeHolder(); - } - return false; + // Check messages sent to null-bucket (i.e. any bucket) for the node. + document::Bucket nullBucket(bucketSpace, document::BucketId()); + tracker.checkPendingMessages(node, nullBucket, rchk); + return rchk.blocked; } } diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.h b/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.h index 9824ae0630f..e8480902549 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.h +++ b/storage/src/vespa/storage/distributor/operations/idealstate/idealstateoperation.h @@ -10,6 +10,7 @@ namespace storage::distributor { +class DistributorBucketSpace; class PendingMessageTracker; class IdealStateManager; @@ -165,9 +166,7 @@ public: @param manager The ideal state manager. */ - void setIdealStateManager(IdealStateManager* manager) { - _manager = manager; - }; + void setIdealStateManager(IdealStateManager* manager); /** Returns the type of operation this is. @@ -224,6 +223,7 @@ protected: friend class IdealStateManager; IdealStateManager* _manager; + DistributorBucketSpace *_bucketSpace; BucketAndNodes _bucketAndNodes; std::string _detailedReason; diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/joinoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/joinoperation.cpp index 77135f56399..52a4a5c195c 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/joinoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/joinoperation.cpp @@ -2,6 +2,7 @@ #include "joinoperation.h" #include <vespa/storageapi/message/bucketsplitting.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <climits> #include <vespa/log/bufferedlogger.h> LOG_SETUP(".distributor.operation.idealstate.join"); @@ -42,7 +43,7 @@ JoinOperation::NodeToBuckets JoinOperation::resolveSourceBucketsPerTargetNode() const { NodeToBuckets nodeToBuckets; - const auto& db(_manager->getDistributorComponent().getBucketDatabase()); + const auto& db(_bucketSpace->getBucketDatabase()); for (const auto& bucket : _bucketsToJoin) { BucketDatabase::Entry entry(db.get(bucket)); @@ -117,7 +118,7 @@ JoinOperation::onReceive(DistributorMessageSender&, const api::StorageReply::SP& LOG(spam, "Adding joined bucket %s", getBucketId().toString().c_str()); } } else if (rep.getResult().getResult() == api::ReturnCode::BUCKET_NOT_FOUND - && _manager->getDistributorComponent().getBucketDatabase().get(getBucketId())->getNode(node) != 0) + && _bucketSpace->getBucketDatabase().get(getBucketId())->getNode(node) != 0) { _manager->getDistributorComponent().recheckBucketInfo(node, getBucket()); LOGBP(warning, "Join failed to find %s: %s", diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp index e889dbe279b..271ac35968e 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/mergeoperation.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "mergeoperation.h" #include <vespa/storage/distributor/idealstatemanager.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <array> #include <vespa/log/bufferedlogger.h> @@ -104,7 +105,7 @@ struct NodeIndexComparator void MergeOperation::onStart(DistributorMessageSender& sender) { - BucketDatabase::Entry entry = _manager->getDistributorComponent().getBucketDatabase().get(getBucketId()); + BucketDatabase::Entry entry = _bucketSpace->getBucketDatabase().get(getBucketId()); if (!entry.valid()) { LOGBP(debug, "Unable to merge nonexisting bucket %s", getBucketId().toString().c_str()); _ok = false; @@ -126,7 +127,7 @@ MergeOperation::onStart(DistributorMessageSender& sender) } _infoBefore = entry.getBucketInfo(); - generateSortedNodeList(_manager->getDistributorComponent().getDistribution(), + generateSortedNodeList(_bucketSpace->getDistribution(), clusterState, getBucketId(), _limiter, @@ -273,7 +274,7 @@ MergeOperation::onReceive(DistributorMessageSender& sender, _ok = result.success(); if (_ok) { BucketDatabase::Entry entry( - _manager->getDistributorComponent().getBucketDatabase().get(getBucketId())); + _bucketSpace->getBucketDatabase().get(getBucketId())); if (!entry.valid()) { LOG(debug, "Bucket %s no longer exists after merge", getBucketId().toString().c_str()); diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/removebucketoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/removebucketoperation.cpp index 6c0245cb590..9a94a5a62ad 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/removebucketoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/removebucketoperation.cpp @@ -3,6 +3,7 @@ #include "removebucketoperation.h" #include <vespa/storage/distributor/idealstatemanager.h> #include <vespa/storage/distributor/distributor.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/log/log.h> @@ -15,7 +16,7 @@ RemoveBucketOperation::onStartInternal(DistributorMessageSender& sender) { std::vector<std::pair<uint16_t, std::shared_ptr<api::DeleteBucketCommand> > > msgs; - BucketDatabase::Entry entry = _manager->getDistributorComponent().getBucketDatabase().get(getBucketId()); + BucketDatabase::Entry entry = _bucketSpace->getBucketDatabase().get(getBucketId()); for (uint32_t i = 0; i < getNodes().size(); ++i) { uint16_t node = getNodes()[i]; diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/setbucketstateoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/setbucketstateoperation.cpp index f3528d30aba..1acb2dcc64b 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/setbucketstateoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/setbucketstateoperation.cpp @@ -2,6 +2,7 @@ #include "setbucketstateoperation.h" #include <vespa/storage/distributor/idealstatemanager.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <vespa/log/log.h> LOG_SETUP(".distributor.operation.idealstate.setactive"); @@ -83,7 +84,7 @@ SetBucketStateOperation::onReceive(DistributorMessageSender& sender, bool deactivate = false; if (reply->getResult().success()) { BucketDatabase::Entry entry = - _manager->getDistributorComponent().getBucketDatabase().get(rep.getBucketId()); + _bucketSpace->getBucketDatabase().get(rep.getBucketId()); if (entry.valid()) { const BucketCopy* copy = entry->getNode(node); @@ -103,7 +104,7 @@ SetBucketStateOperation::onReceive(DistributorMessageSender& sender, node, bInfo).setTrusted(copy->trusted())); - _manager->getDistributorComponent().getBucketDatabase().update(entry); + _bucketSpace->getBucketDatabase().update(entry); } } else { LOG(debug, "%s did not exist when receiving %s", diff --git a/storage/src/vespa/storage/distributor/operations/idealstate/splitoperation.cpp b/storage/src/vespa/storage/distributor/operations/idealstate/splitoperation.cpp index a8f547afe45..1b40f744a80 100644 --- a/storage/src/vespa/storage/distributor/operations/idealstate/splitoperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/idealstate/splitoperation.cpp @@ -4,6 +4,7 @@ #include <vespa/storage/distributor/idealstatemanager.h> #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/message/bucketsplitting.h> +#include <vespa/storage/distributor/distributor_bucket_space.h> #include <climits> #include <vespa/log/bufferedlogger.h> @@ -26,8 +27,7 @@ SplitOperation::onStart(DistributorMessageSender& sender) { _ok = false; - BucketDatabase::Entry entry = _manager->getDistributorComponent() - .getBucketDatabase().get(getBucketId()); + BucketDatabase::Entry entry = _bucketSpace->getBucketDatabase().get(getBucketId()); for (uint32_t i = 0; i < entry->getNodeCount(); i++) { std::shared_ptr<api::SplitBucketCommand> msg( @@ -66,7 +66,7 @@ SplitOperation::onReceive(DistributorMessageSender&, const api::StorageReply::SP if (rep.getResult().success()) { BucketDatabase::Entry entry = - _manager->getDistributorComponent().getBucketDatabase().get(rep.getBucketId()); + _bucketSpace->getBucketDatabase().get(rep.getBucketId()); if (entry.valid()) { entry->removeNode(node); @@ -74,9 +74,9 @@ SplitOperation::onReceive(DistributorMessageSender&, const api::StorageReply::SP if (entry->getNodeCount() == 0) { LOG(spam, "Removing split bucket %s", getBucketId().toString().c_str()); - _manager->getDistributorComponent().getBucketDatabase().remove(rep.getBucketId()); + _bucketSpace->getBucketDatabase().remove(rep.getBucketId()); } else { - _manager->getDistributorComponent().getBucketDatabase().update(entry); + _bucketSpace->getBucketDatabase().update(entry); } ost << getBucketId() << " => "; @@ -115,7 +115,7 @@ SplitOperation::onReceive(DistributorMessageSender&, const api::StorageReply::SP } } else if ( rep.getResult().getResult() == api::ReturnCode::BUCKET_NOT_FOUND - && _manager->getDistributorComponent().getBucketDatabase().get(rep.getBucketId())->getNode(node) != 0) + && _bucketSpace->getBucketDatabase().get(rep.getBucketId())->getNode(node) != 0) { _manager->getDistributorComponent().recheckBucketInfo(node, getBucket()); LOGBP(debug, "Split failed for %s: bucket not found. Storage and " diff --git a/storage/src/vespa/storage/distributor/operationtargetresolver.cpp b/storage/src/vespa/storage/distributor/operationtargetresolver.cpp index f537c6b67b6..fa4697f7861 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolver.cpp +++ b/storage/src/vespa/storage/distributor/operationtargetresolver.cpp @@ -6,15 +6,9 @@ namespace storage { namespace distributor { -document::Bucket -OperationTarget::getBucket() const -{ - return document::Bucket(document::BucketSpace::placeHolder(), _bucket); -} - void OperationTarget::print(vespalib::asciistream& out, const PrintProperties&) const { - out << "OperationTarget(" << _bucket << ", " << _node + out << "OperationTarget(" << _bucket.toString() << ", " << _node << (_newCopy ? ", new copy" : ", existing copy") << ")"; } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolver.h b/storage/src/vespa/storage/distributor/operationtargetresolver.h index b9f7537b5f5..23e0fbbcba4 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolver.h +++ b/storage/src/vespa/storage/distributor/operationtargetresolver.h @@ -15,17 +15,17 @@ namespace distributor { class OperationTarget : public vespalib::AsciiPrintable { - document::BucketId _bucket; + document::Bucket _bucket; lib::Node _node; bool _newCopy; public: OperationTarget() : _newCopy(true) {} - OperationTarget(const document::BucketId& id, const lib::Node& node, bool newCopy) - : _bucket(id), _node(node), _newCopy(newCopy) {} + OperationTarget(const document::Bucket& bucket, const lib::Node& node, bool newCopy) + : _bucket(bucket), _node(node), _newCopy(newCopy) {} - const document::BucketId& getBucketId() const { return _bucket; } - document::Bucket getBucket() const; + document::BucketId getBucketId() const { return _bucket.getBucketId(); } + document::Bucket getBucket() const { return _bucket; } const lib::Node& getNode() const { return _node; } bool isNewCopy() const { return _newCopy; } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp index 942ec3705bf..23bb6b1db78 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp +++ b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.cpp @@ -129,12 +129,12 @@ BucketInstanceList::extendToEnoughCopies( } OperationTargetList -BucketInstanceList::createTargets() +BucketInstanceList::createTargets(document::BucketSpace bucketSpace) { OperationTargetList result; for (uint32_t i=0; i<_instances.size(); ++i) { BucketInstance& bi(_instances[i]); - result.push_back(OperationTarget(bi._bucket, bi._node, !bi._exist)); + result.push_back(OperationTarget(document::Bucket(bucketSpace, bi._bucket), bi._node, !bi._exist)); } return result; } diff --git a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h index 2f4a3e0117a..73a2c281b18 100644 --- a/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h +++ b/storage/src/vespa/storage/distributor/operationtargetresolverimpl.h @@ -71,7 +71,7 @@ public: std::sort(_instances.begin(), _instances.end(), order); } - OperationTargetList createTargets(); + OperationTargetList createTargets(document::BucketSpace bucketSpace); void print(vespalib::asciistream& out, const PrintProperties& p) const override; }; @@ -81,16 +81,19 @@ class OperationTargetResolverImpl : public OperationTargetResolver { const lib::IdealNodeCalculator& _idealNodeCalculator; uint32_t _minUsedBucketBits; uint16_t _redundancy; + document::BucketSpace _bucketSpace; public: OperationTargetResolverImpl(BucketDatabase& bucketDatabase, const lib::IdealNodeCalculator& idealNodeCalc, uint32_t minUsedBucketBits, - uint16_t redundancy) + uint16_t redundancy, + document::BucketSpace bucketSpace) : _bucketDatabase(bucketDatabase), _idealNodeCalculator(idealNodeCalc), _minUsedBucketBits(minUsedBucketBits), - _redundancy(redundancy) + _redundancy(redundancy), + _bucketSpace(bucketSpace) {} BucketInstanceList getAllInstances(OperationType type, @@ -102,7 +105,7 @@ public: } OperationTargetList getTargets(OperationType type, const document::BucketId& id) override { - return getInstances(type, id).createTargets(); + return getInstances(type, id).createTargets(_bucketSpace); } }; diff --git a/storage/src/vespa/storage/distributor/outdated_nodes.h b/storage/src/vespa/storage/distributor/outdated_nodes.h new file mode 100644 index 00000000000..fddb1806d82 --- /dev/null +++ b/storage/src/vespa/storage/distributor/outdated_nodes.h @@ -0,0 +1,11 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <unordered_set> + +namespace storage::distributor::dbtransition { + +using OutdatedNodes = std::unordered_set<uint16_t>; + +} diff --git a/storage/src/vespa/storage/distributor/outdated_nodes_map.h b/storage/src/vespa/storage/distributor/outdated_nodes_map.h new file mode 100644 index 00000000000..8d08b20732b --- /dev/null +++ b/storage/src/vespa/storage/distributor/outdated_nodes_map.h @@ -0,0 +1,13 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "outdated_nodes.h" +#include <vespa/document/bucket/bucketspace.h> +#include <unordered_map> + +namespace storage::distributor::dbtransition { + +using OutdatedNodesMap = std::unordered_map<document::BucketSpace, OutdatedNodes, document::BucketSpace::hash>; + +} diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp new file mode 100644 index 00000000000..ed9c8bc222b --- /dev/null +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.cpp @@ -0,0 +1,421 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "pending_bucket_space_db_transition.h" +#include "clusterinformation.h" +#include "pendingclusterstate.h" +#include "distributor_bucket_space.h" +#include <vespa/storage/common/bucketoperationlogger.h> +#include <algorithm> + +#include <vespa/log/log.h> +LOG_SETUP(".pendingbucketspacedbtransition"); + +namespace storage::distributor { + +using lib::Node; +using lib::NodeType; +using lib::NodeState; + +PendingBucketSpaceDbTransition::PendingBucketSpaceDbTransition(const PendingClusterState &pendingClusterState, + DistributorBucketSpace &distributorBucketSpace, + bool distributionChanged, + const OutdatedNodes &outdatedNodes, + std::shared_ptr<const ClusterInformation> clusterInfo, + const lib::ClusterState &newClusterState, + api::Timestamp creationTimestamp) + : _entries(), + _iter(0), + _removedBuckets(), + _missingEntries(), + _clusterInfo(std::move(clusterInfo)), + _outdatedNodes(newClusterState.getNodeCount(NodeType::STORAGE)), + _prevClusterState(_clusterInfo->getClusterState()), + _newClusterState(newClusterState), + _creationTimestamp(creationTimestamp), + _pendingClusterState(pendingClusterState), + _distributorBucketSpace(distributorBucketSpace), + _distributorIndex(_clusterInfo->getDistributorIndex()), + _bucketOwnershipTransfer(distributionChanged) +{ + if (distributorChanged()) { + _bucketOwnershipTransfer = true; + } + if (_bucketOwnershipTransfer) { + markAllAvailableNodesAsRequiringRequest(); + } else { + updateSetOfNodesThatAreOutdated(); + addAdditionalNodesToOutdatedSet(outdatedNodes); + } +} + +PendingBucketSpaceDbTransition::~PendingBucketSpaceDbTransition() +{ +} + +PendingBucketSpaceDbTransition::Range +PendingBucketSpaceDbTransition::skipAllForSameBucket() +{ + Range r(_iter, _iter); + + for (document::BucketId& bid = _entries[_iter].bucketId; + _iter < _entries.size() && _entries[_iter].bucketId == bid; + ++_iter) + { + } + + r.second = _iter; + return r; +} + +std::vector<BucketCopy> +PendingBucketSpaceDbTransition::getCopiesThatAreNewOrAltered(BucketDatabase::Entry& info, const Range& range) +{ + std::vector<BucketCopy> copiesToAdd; + for (uint32_t i = range.first; i < range.second; ++i) { + const BucketCopy& candidate(_entries[i].copy); + const BucketCopy* cp = info->getNode(candidate.getNode()); + + if (!cp || !(cp->getBucketInfo() == candidate.getBucketInfo())) { + copiesToAdd.push_back(candidate); + } + } + return copiesToAdd; +} + +void +PendingBucketSpaceDbTransition::insertInfo(BucketDatabase::Entry& info, const Range& range) +{ + std::vector<BucketCopy> copiesToAddOrUpdate( + getCopiesThatAreNewOrAltered(info, range)); + + const auto &dist(_distributorBucketSpace.getDistribution()); + std::vector<uint16_t> order( + dist.getIdealStorageNodes( + _newClusterState, + _entries[range.first].bucketId, + _clusterInfo->getStorageUpStates())); + info->addNodes(copiesToAddOrUpdate, order, TrustedUpdate::DEFER); + + LOG_BUCKET_OPERATION_NO_LOCK( + _entries[range.first].bucketId, + vespalib::make_string("insertInfo: %s", + info.toString().c_str())); +} + +std::string +PendingBucketSpaceDbTransition::requestNodesToString() +{ + return _pendingClusterState.requestNodesToString(); +} + +bool +PendingBucketSpaceDbTransition::removeCopiesFromNodesThatWereRequested(BucketDatabase::Entry& e, const document::BucketId& bucketId) +{ + bool updated = false; + for (uint32_t i = 0; i < e->getNodeCount();) { + auto& info(e->getNodeRef(i)); + const uint16_t entryNode(info.getNode()); + // Don't remove an entry if it's been updated in the time after the + // bucket info requests were sent, as this would erase newer state. + // Don't immediately update trusted state, as that could erroneously + // mark a single remaining replica as trusted even though there might + // be one or more additional replicas pending merge into the database. + if (nodeIsOutdated(entryNode) + && (info.getTimestamp() < _creationTimestamp) + && e->removeNode(entryNode, TrustedUpdate::DEFER)) + { + LOG(spam, + "Removed bucket %s from node %d", + bucketId.toString().c_str(), + entryNode); + updated = true; + // After removing current node, getNodeRef(i) will point to the _next_ node, so don't increment `i`. + } else { + ++i; + } + } + return updated; +} + +bool +PendingBucketSpaceDbTransition::databaseIteratorHasPassedBucketInfoIterator(const document::BucketId& bucketId) const +{ + return (_iter < _entries.size() + && _entries[_iter].bucketId.toKey() < bucketId.toKey()); +} + +bool +PendingBucketSpaceDbTransition::bucketInfoIteratorPointsToBucket(const document::BucketId& bucketId) const +{ + return _iter < _entries.size() && _entries[_iter].bucketId == bucketId; +} + +bool +PendingBucketSpaceDbTransition::process(BucketDatabase::Entry& e) +{ + document::BucketId bucketId(e.getBucketId()); + + LOG(spam, + "Before merging info from nodes [%s], bucket %s had info %s", + requestNodesToString().c_str(), + bucketId.toString().c_str(), + e.getBucketInfo().toString().c_str()); + + while (databaseIteratorHasPassedBucketInfoIterator(bucketId)) { + LOG(spam, "Found new bucket %s, adding", + _entries[_iter].bucketId.toString().c_str()); + + _missingEntries.push_back(skipAllForSameBucket()); + } + + bool updated(removeCopiesFromNodesThatWereRequested(e, bucketId)); + + if (bucketInfoIteratorPointsToBucket(bucketId)) { + LOG(spam, "Updating bucket %s", + _entries[_iter].bucketId.toString().c_str()); + + insertInfo(e, skipAllForSameBucket()); + updated = true; + } + + if (updated) { + // Remove bucket if we've previously removed all nodes from it + if (e->getNodeCount() == 0) { + _removedBuckets.push_back(bucketId); + } else { + e.getBucketInfo().updateTrusted(); + } + } + + LOG(spam, + "After merging info from nodes [%s], bucket %s had info %s", + requestNodesToString().c_str(), + bucketId.toString().c_str(), + e.getBucketInfo().toString().c_str()); + + return true; +} + +void +PendingBucketSpaceDbTransition::addToBucketDB(BucketDatabase& db, const Range& range) +{ + LOG(spam, "Adding new bucket %s with %d copies", + _entries[range.first].bucketId.toString().c_str(), + range.second - range.first); + + BucketDatabase::Entry e(_entries[range.first].bucketId, BucketInfo()); + insertInfo(e, range); + if (e->getLastGarbageCollectionTime() == 0) { + e->setLastGarbageCollectionTime( + framework::MicroSecTime(_creationTimestamp) + .getSeconds().getTime()); + } + e.getBucketInfo().updateTrusted(); + db.update(e); +} + +void +PendingBucketSpaceDbTransition::mergeIntoBucketDatabase() +{ + BucketDatabase &db(_distributorBucketSpace.getBucketDatabase()); + std::sort(_entries.begin(), _entries.end()); + + db.forEach(*this); + + for (uint32_t i = 0; i < _removedBuckets.size(); ++i) { + db.remove(_removedBuckets[i]); + } + _removedBuckets.clear(); + + // All of the remaining were not already in the bucket database. + while (_iter < _entries.size()) { + _missingEntries.push_back(skipAllForSameBucket()); + } + + for (uint32_t i = 0; i < _missingEntries.size(); ++i) { + addToBucketDB(db, _missingEntries[i]); + } +} + +void +PendingBucketSpaceDbTransition::onRequestBucketInfoReply(const api::RequestBucketInfoReply &reply, uint16_t node) +{ + for (const auto &entry : reply.getBucketInfo()) { + _entries.emplace_back(entry._bucketId, + BucketCopy(_creationTimestamp, + node, + entry._info)); + } +} + +bool +PendingBucketSpaceDbTransition::distributorChanged() +{ + const auto &oldState(_prevClusterState); + const auto &newState(_newClusterState); + if (newState.getDistributionBitCount() != oldState.getDistributionBitCount()) { + return true; + } + + Node myNode(NodeType::DISTRIBUTOR, _distributorIndex); + if (oldState.getNodeState(myNode).getState() == lib::State::DOWN) { + return true; + } + + uint16_t oldCount = oldState.getNodeCount(NodeType::DISTRIBUTOR); + uint16_t newCount = newState.getNodeCount(NodeType::DISTRIBUTOR); + + uint16_t maxCount = std::max(oldCount, newCount); + + for (uint16_t i = 0; i < maxCount; ++i) { + Node node(NodeType::DISTRIBUTOR, i); + + const lib::State& old(oldState.getNodeState(node).getState()); + const lib::State& nw(newState.getNodeState(node).getState()); + + if (nodeWasUpButNowIsDown(old, nw)) { + if (nodeInSameGroupAsSelf(i) || + nodeNeedsOwnershipTransferFromGroupDown(i, newState)) { + return true; + } + } + } + + return false; +} + +bool +PendingBucketSpaceDbTransition::nodeWasUpButNowIsDown(const lib::State& old, + const lib::State& nw) +{ + return (old.oneOf("uimr") && !nw.oneOf("uimr")); +} + +bool +PendingBucketSpaceDbTransition::nodeInSameGroupAsSelf(uint16_t index) const +{ + const auto &dist(_distributorBucketSpace.getDistribution()); + if (dist.getNodeGraph().getGroupForNode(index) == + dist.getNodeGraph().getGroupForNode(_distributorIndex)) { + LOG(debug, + "Distributor %d state changed, need to request data from all " + "storage nodes", + index); + return true; + } else { + LOG(debug, + "Distributor %d state changed but unrelated to my group.", + index); + return false; + } +} + +bool +PendingBucketSpaceDbTransition::nodeNeedsOwnershipTransferFromGroupDown( + uint16_t nodeIndex, + const lib::ClusterState& state) const +{ + const auto &dist(_distributorBucketSpace.getDistribution()); + if (!dist.distributorAutoOwnershipTransferOnWholeGroupDown()) { + return false; // Not doing anything for downed groups. + } + const lib::Group* group(dist.getNodeGraph().getGroupForNode(nodeIndex)); + // If there is no group information associated with the node (because the + // group has changed or the node has been removed from config), we must + // also invoke ownership transfer of buckets. + if (group == nullptr + || lib::Distribution::allDistributorsDown(*group, state)) + { + LOG(debug, + "Distributor %u state changed and is in a " + "group that now has no distributors remaining", + nodeIndex); + return true; + } + return false; +} + +uint16_t +PendingBucketSpaceDbTransition::newStateStorageNodeCount() const +{ + return _newClusterState.getNodeCount(lib::NodeType::STORAGE); +} + +bool +PendingBucketSpaceDbTransition::storageNodeMayHaveLostData(uint16_t index) +{ + Node node(NodeType::STORAGE, index); + NodeState newState = _newClusterState.getNodeState(node); + NodeState oldState = _prevClusterState.getNodeState(node); + + return (newState.getStartTimestamp() > oldState.getStartTimestamp()); +} + +void +PendingBucketSpaceDbTransition::updateSetOfNodesThatAreOutdated() +{ + const uint16_t nodeCount(newStateStorageNodeCount()); + for (uint16_t index = 0; index < nodeCount; ++index) { + if (storageNodeMayHaveLostData(index) || storageNodeChanged(index)) { + _outdatedNodes.insert(index); + } + } +} + +bool +PendingBucketSpaceDbTransition::storageNodeChanged(uint16_t index) { + Node node(NodeType::STORAGE, index); + NodeState newState = _newClusterState.getNodeState(node); + NodeState oldNodeState = _prevClusterState.getNodeState(node); + + // similarTo() also covers disk states. + if (!(oldNodeState.similarTo(newState))) { + LOG(debug, + "State for storage node %d has changed from '%s' to '%s', " + "updating bucket information", + index, + oldNodeState.toString().c_str(), + newState.toString().c_str()); + return true; + } + + return false; +} + +bool +PendingBucketSpaceDbTransition::storageNodeUpInNewState(uint16_t node) const +{ + return _newClusterState.getNodeState(Node(NodeType::STORAGE, node)) + .getState().oneOf(_clusterInfo->getStorageUpStates()); +} + +void +PendingBucketSpaceDbTransition::markAllAvailableNodesAsRequiringRequest() +{ + const uint16_t nodeCount(newStateStorageNodeCount()); + for (uint16_t i = 0; i < nodeCount; ++i) { + if (storageNodeUpInNewState(i)) { + _outdatedNodes.insert(i); + } + } +} + +void +PendingBucketSpaceDbTransition::addAdditionalNodesToOutdatedSet( + const std::unordered_set<uint16_t>& nodes) +{ + const uint16_t nodeCount(newStateStorageNodeCount()); + for (uint16_t node : nodes) { + if (node < nodeCount) { + _outdatedNodes.insert(node); + } + } +} + +void +PendingBucketSpaceDbTransition::addNodeInfo(const document::BucketId& id, const BucketCopy& copy) +{ + _entries.emplace_back(id, copy); +} + +} diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h new file mode 100644 index 00000000000..903f9b762fb --- /dev/null +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition.h @@ -0,0 +1,116 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "pending_bucket_space_db_transition_entry.h" +#include "outdated_nodes.h" +#include <vespa/storage/bucketdb/bucketdatabase.h> + +namespace storage::api { class RequestBucketInfoReply; } +namespace storage::lib { class ClusterState; class State; } + +namespace storage::distributor { + +class ClusterInformation; +class PendingClusterState; +class DistributorBucketSpace; + +/** + * Class used by PendingClusterState to track request bucket info + * reply result within a bucket space and apply it to the distributor + * bucket database when switching to the pending cluster state. + */ +class PendingBucketSpaceDbTransition : public BucketDatabase::MutableEntryProcessor +{ +public: + using Entry = dbtransition::Entry; + using EntryList = std::vector<Entry>; + using OutdatedNodes = dbtransition::OutdatedNodes; +private: + using Range = std::pair<uint32_t, uint32_t>; + + EntryList _entries; + uint32_t _iter; + std::vector<document::BucketId> _removedBuckets; + std::vector<Range> _missingEntries; + std::shared_ptr<const ClusterInformation> _clusterInfo; + + // Set for all nodes that may have changed state since that previous + // active cluster state, or that were marked as outdated when the pending + // cluster state was constructed. + // May be a superset of _requestedNodes, as some nodes that are outdated + // may be down and thus cannot get a request. + OutdatedNodes _outdatedNodes; + + const lib::ClusterState &_prevClusterState; + const lib::ClusterState &_newClusterState; + const api::Timestamp _creationTimestamp; + const PendingClusterState &_pendingClusterState; + DistributorBucketSpace &_distributorBucketSpace; + uint16_t _distributorIndex; + bool _bucketOwnershipTransfer; + + // BucketDataBase::MutableEntryProcessor API + bool process(BucketDatabase::Entry& e) override; + + /** + * Skips through all entries for the same bucket and returns + * the range in the entry list for which they were found. + * The range is [from, to> + */ + Range skipAllForSameBucket(); + + std::vector<BucketCopy> getCopiesThatAreNewOrAltered(BucketDatabase::Entry& info, const Range& range); + void insertInfo(BucketDatabase::Entry& info, const Range& range); + void addToBucketDB(BucketDatabase& db, const Range& range); + + bool nodeIsOutdated(uint16_t node) const { + return (_outdatedNodes.find(node) != _outdatedNodes.end()); + } + + // Returns whether at least one replica was removed from the entry. + // Does NOT implicitly update trusted status on remaining replicas; caller must do + // this explicitly. + bool removeCopiesFromNodesThatWereRequested(BucketDatabase::Entry& e, const document::BucketId& bucketId); + + // Helper methods for iterating over _entries + bool databaseIteratorHasPassedBucketInfoIterator(const document::BucketId& bucketId) const; + bool bucketInfoIteratorPointsToBucket(const document::BucketId& bucketId) const; + std::string requestNodesToString(); + + bool distributorChanged(); + static bool nodeWasUpButNowIsDown(const lib::State &old, const lib::State &nw); + bool storageNodeUpInNewState(uint16_t node) const; + bool nodeInSameGroupAsSelf(uint16_t index) const; + bool nodeNeedsOwnershipTransferFromGroupDown(uint16_t nodeIndex, const lib::ClusterState& state) const; + uint16_t newStateStorageNodeCount() const; + bool storageNodeMayHaveLostData(uint16_t index); + bool storageNodeChanged(uint16_t index); + void markAllAvailableNodesAsRequiringRequest(); + void addAdditionalNodesToOutdatedSet(const OutdatedNodes &nodes); + void updateSetOfNodesThatAreOutdated(); + +public: + PendingBucketSpaceDbTransition(const PendingClusterState &pendingClusterState, + DistributorBucketSpace &distributorBucketSpace, + bool distributionChanged, + const OutdatedNodes &outdatedNodes, + std::shared_ptr<const ClusterInformation> clusterInfo, + const lib::ClusterState &newClusterState, + api::Timestamp creationTimestamp); + ~PendingBucketSpaceDbTransition(); + + // Merges all the results with the corresponding bucket database. + void mergeIntoBucketDatabase(); + + // Adds the info from the reply to our list of information. + void onRequestBucketInfoReply(const api::RequestBucketInfoReply &reply, uint16_t node); + + const OutdatedNodes &getOutdatedNodes() { return _outdatedNodes; } + bool getBucketOwnershipTransfer() const { return _bucketOwnershipTransfer; } + + // Methods used by unit tests. + const EntryList& results() const { return _entries; } + void addNodeInfo(const document::BucketId& id, const BucketCopy& copy); +}; + +} diff --git a/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition_entry.h b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition_entry.h new file mode 100644 index 00000000000..ad6e0695be1 --- /dev/null +++ b/storage/src/vespa/storage/distributor/pending_bucket_space_db_transition_entry.h @@ -0,0 +1,24 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/document/bucket/bucketid.h> +#include <vespa/storage/bucketdb/bucketcopy.h> + +namespace storage::distributor::dbtransition { + +struct Entry { + Entry(const document::BucketId& bid, + const BucketCopy& copy_) + : bucketId(bid), + copy(copy_) + {} + + document::BucketId bucketId; + BucketCopy copy; + + bool operator<(const Entry& other) const { + return bucketId.toKey() < other.bucketId.toKey(); + } +}; + +} diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp index 9ad803b7b7f..71684db5527 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.cpp +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.cpp @@ -1,7 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "pendingclusterstate.h" +#include "pending_bucket_space_db_transition.h" #include "bucketdbupdater.h" +#include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/storageframework/defaultimplementation/clock/realclock.h> #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/vespalib/util/xmlstream.hpp> @@ -22,68 +25,82 @@ PendingClusterState::PendingClusterState( const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, - const std::unordered_set<uint16_t>& outdatedNodes, + const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp) : _cmd(newStateCmd), _requestedNodes(newStateCmd->getSystemState().getNodeCount(lib::NodeType::STORAGE)), - _outdatedNodes(newStateCmd->getSystemState().getNodeCount(lib::NodeType::STORAGE)), - _iter(0), _prevClusterState(clusterInfo->getClusterState()), _newClusterState(newStateCmd->getSystemState()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), _sender(sender), - _bucketOwnershipTransfer(distributorChanged(_prevClusterState, _newClusterState)) + _bucketSpaceRepo(bucketSpaceRepo), + _bucketOwnershipTransfer(false), + _pendingTransitions() { logConstructionInformation(); - if (hasBucketOwnershipTransfer()) { - markAllAvailableNodesAsRequiringRequest(); - } else { - updateSetOfNodesThatAreOutdated(); - addAdditionalNodesToOutdatedSet(outdatedNodes); - } - if (shouldRequestBucketInfo()) { - requestNodes(); - } + initializeBucketSpaceTransitions(false, outdatedNodesMap); } PendingClusterState::PendingClusterState( const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, api::Timestamp creationTimestamp) : _requestedNodes(clusterInfo->getStorageNodeCount()), - _outdatedNodes(clusterInfo->getStorageNodeCount()), - _iter(0), _prevClusterState(clusterInfo->getClusterState()), _newClusterState(clusterInfo->getClusterState()), _clock(clock), _clusterInfo(clusterInfo), _creationTimestamp(creationTimestamp), _sender(sender), - _bucketOwnershipTransfer(true) + _bucketSpaceRepo(bucketSpaceRepo), + _bucketOwnershipTransfer(true), + _pendingTransitions() { logConstructionInformation(); - markAllAvailableNodesAsRequiringRequest(); + initializeBucketSpaceTransitions(true, OutdatedNodesMap()); +} + +PendingClusterState::~PendingClusterState() {} + +void +PendingClusterState::initializeBucketSpaceTransitions(bool distributionChanged, const OutdatedNodesMap &outdatedNodesMap) +{ + OutdatedNodes emptyOutdatedNodes; + for (auto &elem : _bucketSpaceRepo) { + auto onItr = outdatedNodesMap.find(elem.first); + const auto &outdatedNodes = (onItr == outdatedNodesMap.end()) ? emptyOutdatedNodes : onItr->second; + auto pendingTransition = + std::make_unique<PendingBucketSpaceDbTransition> + (*this, *elem.second, distributionChanged, outdatedNodes, + _clusterInfo, _newClusterState, _creationTimestamp); + if (pendingTransition->getBucketOwnershipTransfer()) { + _bucketOwnershipTransfer = true; + } + _pendingTransitions.emplace(elem.first, std::move(pendingTransition)); + } if (shouldRequestBucketInfo()) { requestNodes(); } } -PendingClusterState::~PendingClusterState() {} - void PendingClusterState::logConstructionInformation() const { + const auto &distributorBucketSpace(_bucketSpaceRepo.get(BucketSpace::placeHolder())); + const auto &distribution(distributorBucketSpace.getDistribution()); LOG(debug, "New PendingClusterState constructed with previous cluster " "state '%s', new cluster state '%s', distribution config " "hash: '%s'", _prevClusterState.toString().c_str(), _newClusterState.toString().c_str(), - _clusterInfo->getDistribution().getNodeGraph().getDistributionConfigHash().c_str()); + distribution.getNodeGraph().getDistributionConfigHash().c_str()); } bool @@ -93,33 +110,14 @@ PendingClusterState::storageNodeUpInNewState(uint16_t node) const .getState().oneOf(_clusterInfo->getStorageUpStates()); } -void -PendingClusterState::markAllAvailableNodesAsRequiringRequest() +PendingClusterState::OutdatedNodesMap +PendingClusterState::getOutdatedNodesMap() const { - const uint16_t nodeCount(newStateStorageNodeCount()); - for (uint16_t i = 0; i < nodeCount; ++i) { - if (storageNodeUpInNewState(i)) { - _outdatedNodes.insert(i); - } + OutdatedNodesMap outdatedNodesMap; + for (const auto &elem : _pendingTransitions) { + outdatedNodesMap.emplace(elem.first, elem.second->getOutdatedNodes()); } -} - -void -PendingClusterState::addAdditionalNodesToOutdatedSet( - const std::unordered_set<uint16_t>& nodes) -{ - const uint16_t nodeCount(newStateStorageNodeCount()); - for (uint16_t node : nodes) { - if (node < nodeCount) { - _outdatedNodes.insert(node); - } - } -} - -std::unordered_set<uint16_t> -PendingClusterState::getOutdatedNodeSet() const -{ - return _outdatedNodes; + return outdatedNodesMap; } uint16_t @@ -157,47 +155,6 @@ PendingClusterState::iAmDown() const return myState.getState() == lib::State::DOWN; } -bool -PendingClusterState::storageNodeMayHaveLostData(uint16_t index) -{ - Node node(NodeType::STORAGE, index); - NodeState newState = _newClusterState.getNodeState(node); - NodeState oldState = _prevClusterState.getNodeState(node); - - return (newState.getStartTimestamp() > oldState.getStartTimestamp()); -} - -void -PendingClusterState::updateSetOfNodesThatAreOutdated() -{ - const uint16_t nodeCount(newStateStorageNodeCount()); - for (uint16_t index = 0; index < nodeCount; ++index) { - if (storageNodeMayHaveLostData(index) || storageNodeChanged(index)) { - _outdatedNodes.insert(index); - } - } -} - -bool -PendingClusterState::storageNodeChanged(uint16_t index) { - Node node(NodeType::STORAGE, index); - NodeState newState = _newClusterState.getNodeState(node); - NodeState oldNodeState = _prevClusterState.getNodeState(node); - - // similarTo() also covers disk states. - if (!(oldNodeState.similarTo(newState))) { - LOG(debug, - "State for storage node %d has changed from '%s' to '%s', " - "updating bucket information", - index, - oldNodeState.toString().c_str(), - newState.toString().c_str()); - return true; - } - - return false; -} - void PendingClusterState::requestNodes() { @@ -212,114 +169,33 @@ PendingClusterState::requestNodes() void PendingClusterState::requestBucketInfoFromStorageNodesWithChangedState() { - for (uint16_t idx : _outdatedNodes) { - if (storageNodeUpInNewState(idx)) { - requestNode(idx); - } - } -} - -bool -PendingClusterState::distributorChanged( - const lib::ClusterState& oldState, - const lib::ClusterState& newState) -{ - if (newState.getDistributionBitCount() != - oldState.getDistributionBitCount()) - { - return true; - } - - Node myNode(NodeType::DISTRIBUTOR, _sender.getDistributorIndex()); - if (oldState.getNodeState(myNode).getState() == - lib::State::DOWN) - { - return true; - } - - uint16_t oldCount = oldState.getNodeCount(NodeType::DISTRIBUTOR); - uint16_t newCount = newState.getNodeCount(NodeType::DISTRIBUTOR); - - uint16_t maxCount = std::max(oldCount, newCount); - - for (uint16_t i = 0; i < maxCount; ++i) { - Node node(NodeType::DISTRIBUTOR, i); - - const lib::State& old(oldState.getNodeState(node).getState()); - const lib::State& nw(newState.getNodeState(node).getState()); - - if (nodeWasUpButNowIsDown(old, nw)) { - return (nodeInSameGroupAsSelf(i) - || nodeNeedsOwnershipTransferFromGroupDown(i, newState)); + for (auto &elem : _pendingTransitions) { + const OutdatedNodes &outdatedNodes(elem.second->getOutdatedNodes()); + for (uint16_t idx : outdatedNodes) { + if (storageNodeUpInNewState(idx)) { + requestNode(BucketSpaceAndNode(elem.first, idx)); + } } } - - return false; -} - -bool -PendingClusterState::nodeWasUpButNowIsDown(const lib::State& old, - const lib::State& nw) const -{ - return (old.oneOf("uimr") && !nw.oneOf("uimr")); -} - -bool -PendingClusterState::nodeInSameGroupAsSelf(uint16_t index) const -{ - if (_clusterInfo->nodeInSameGroupAsSelf(index)) { - LOG(debug, - "Distributor %d state changed, need to request data from all " - "storage nodes", - index); - return true; - } else { - LOG(debug, - "Distributor %d state changed but unrelated to my group.", - index); - return false; - } -} - -bool -PendingClusterState::nodeNeedsOwnershipTransferFromGroupDown( - uint16_t nodeIndex, - const lib::ClusterState& state) const -{ - const lib::Distribution& dist(_clusterInfo->getDistribution()); - if (!dist.distributorAutoOwnershipTransferOnWholeGroupDown()) { - return false; // Not doing anything for downed groups. - } - const lib::Group* group(dist.getNodeGraph().getGroupForNode(nodeIndex)); - // If there is no group information associated with the node (because the - // group has changed or the node has been removed from config), we must - // also invoke ownership transfer of buckets. - if (group == nullptr - || lib::Distribution::allDistributorsDown(*group, state)) - { - LOG(debug, - "Distributor %u state changed and is in a " - "group that now has no distributors remaining", - nodeIndex); - return true; - } - return false; } void -PendingClusterState::requestNode(uint16_t node) +PendingClusterState::requestNode(BucketSpaceAndNode bucketSpaceAndNode) { - vespalib::string distributionHash(_clusterInfo->getDistributionHash()); + const auto &distributorBucketSpace(_bucketSpaceRepo.get(bucketSpaceAndNode.bucketSpace)); + const auto &distribution(distributorBucketSpace.getDistribution()); + vespalib::string distributionHash(distribution.getNodeGraph().getDistributionConfigHash()); LOG(debug, - "Requesting bucket info for node %d with cluster state '%s' " + "Requesting bucket info for bucket space %" PRIu64 " node %d with cluster state '%s' " "and distribution hash '%s'", - node, + bucketSpaceAndNode.bucketSpace.getId(), + bucketSpaceAndNode.node, _newClusterState.toString().c_str(), distributionHash.c_str()); std::shared_ptr<api::RequestBucketInfoCommand> cmd( new api::RequestBucketInfoCommand( - BucketSpace::placeHolder(), + bucketSpaceAndNode.bucketSpace, _sender.getDistributorIndex(), _newClusterState, distributionHash)); @@ -327,9 +203,9 @@ PendingClusterState::requestNode(uint16_t node) cmd->setPriority(api::StorageMessage::HIGH); cmd->setTimeout(INT_MAX); - _sentMessages[cmd->getMsgId()] = node; + _sentMessages.emplace(cmd->getMsgId(), bucketSpaceAndNode); - _sender.sendToNode(NodeType::STORAGE, node, cmd); + _sender.sendToNode(NodeType::STORAGE, bucketSpaceAndNode.node, cmd); } @@ -353,25 +229,20 @@ PendingClusterState::onRequestBucketInfoReply(const std::shared_ptr<api::Request if (iter == _sentMessages.end()) { return false; } - const uint16_t node = iter->second; + const BucketSpaceAndNode bucketSpaceAndNode = iter->second; if (!reply->getResult().success()) { framework::MilliSecTime resendTime(_clock); resendTime += framework::MilliSecTime(100); - _delayedRequests.push_back(std::make_pair(resendTime, node)); + _delayedRequests.emplace_back(resendTime, bucketSpaceAndNode); _sentMessages.erase(iter); return true; } - setNodeReplied(node); - - for (uint32_t i = 0; i < reply->getBucketInfo().size(); ++i) { - addNodeInfo(reply->getBucketInfo()[i]._bucketId, - BucketCopy(_creationTimestamp, - node, - reply->getBucketInfo()[i]._info)); - } - + setNodeReplied(bucketSpaceAndNode.node); + auto transitionIter = _pendingTransitions.find(bucketSpaceAndNode.bucketSpace); + assert(transitionIter != _pendingTransitions.end()); + transitionIter->second->onRequestBucketInfoReply(*reply, bucketSpaceAndNode.node); _sentMessages.erase(iter); return true; @@ -389,68 +260,8 @@ PendingClusterState::resendDelayedMessages() { } } -void -PendingClusterState::addNodeInfo( - const document::BucketId& id, - const BucketCopy& copy) -{ - _entries.push_back(Entry(id, copy)); -} - -PendingClusterState::Range -PendingClusterState::skipAllForSameBucket() -{ - Range r(_iter, _iter); - - for (document::BucketId& bid = _entries[_iter].bucketId; - _iter < _entries.size() && _entries[_iter].bucketId == bid; - ++_iter) - { - } - - r.second = _iter; - return r; -} - -void -PendingClusterState::insertInfo( - BucketDatabase::Entry& info, - const Range& range) -{ - std::vector<BucketCopy> copiesToAddOrUpdate( - getCopiesThatAreNewOrAltered(info, range)); - - std::vector<uint16_t> order( - _clusterInfo->getIdealStorageNodesForState( - _newClusterState, - _entries[range.first].bucketId)); - info->addNodes(copiesToAddOrUpdate, order, TrustedUpdate::DEFER); - - LOG_BUCKET_OPERATION_NO_LOCK( - _entries[range.first].bucketId, - vespalib::make_string("insertInfo: %s", - info.toString().c_str())); -} - -std::vector<BucketCopy> -PendingClusterState::getCopiesThatAreNewOrAltered( - BucketDatabase::Entry& info, - const Range& range) -{ - std::vector<BucketCopy> copiesToAdd; - for (uint32_t i = range.first; i < range.second; ++i) { - const BucketCopy& candidate(_entries[i].copy); - const BucketCopy* cp = info->getNode(candidate.getNode()); - - if (!cp || !(cp->getBucketInfo() == candidate.getBucketInfo())) { - copiesToAdd.push_back(candidate); - } - } - return copiesToAdd; -} - std::string -PendingClusterState::requestNodesToString() +PendingClusterState::requestNodesToString() const { std::ostringstream ost; for (uint32_t i = 0; i < _requestedNodes.size(); ++i) { @@ -464,136 +275,11 @@ PendingClusterState::requestNodesToString() return ost.str(); } -bool -PendingClusterState::removeCopiesFromNodesThatWereRequested( - BucketDatabase::Entry& e, - const document::BucketId& bucketId) -{ - bool updated = false; - for (uint32_t i = 0; i < e->getNodeCount();) { - auto& info(e->getNodeRef(i)); - const uint16_t entryNode(info.getNode()); - // Don't remove an entry if it's been updated in the time after the - // bucket info requests were sent, as this would erase newer state. - // Don't immediately update trusted state, as that could erroneously - // mark a single remaining replica as trusted even though there might - // be one or more additional replicas pending merge into the database. - if (nodeIsOutdated(entryNode) - && (info.getTimestamp() < _creationTimestamp) - && e->removeNode(entryNode, TrustedUpdate::DEFER)) - { - LOG(spam, - "Removed bucket %s from node %d", - bucketId.toString().c_str(), - entryNode); - updated = true; - // After removing current node, getNodeRef(i) will point to the _next_ node, so don't increment `i`. - } else { - ++i; - } - } - return updated; -} - -bool -PendingClusterState::databaseIteratorHasPassedBucketInfoIterator( - const document::BucketId& bucketId) const -{ - return (_iter < _entries.size() - && _entries[_iter].bucketId.toKey() < bucketId.toKey()); -} - -bool -PendingClusterState::bucketInfoIteratorPointsToBucket( - const document::BucketId& bucketId) const -{ - return _iter < _entries.size() && _entries[_iter].bucketId == bucketId; -} - -bool -PendingClusterState::process(BucketDatabase::Entry& e) -{ - document::BucketId bucketId(e.getBucketId()); - - LOG(spam, - "Before merging info from nodes [%s], bucket %s had info %s", - requestNodesToString().c_str(), - bucketId.toString().c_str(), - e.getBucketInfo().toString().c_str()); - - while (databaseIteratorHasPassedBucketInfoIterator(bucketId)) { - LOG(spam, "Found new bucket %s, adding", - _entries[_iter].bucketId.toString().c_str()); - - _missingEntries.push_back(skipAllForSameBucket()); - } - - bool updated(removeCopiesFromNodesThatWereRequested(e, bucketId)); - - if (bucketInfoIteratorPointsToBucket(bucketId)) { - LOG(spam, "Updating bucket %s", - _entries[_iter].bucketId.toString().c_str()); - - insertInfo(e, skipAllForSameBucket()); - updated = true; - } - - if (updated) { - // Remove bucket if we've previously removed all nodes from it - if (e->getNodeCount() == 0) { - _removedBuckets.push_back(bucketId); - } else { - e.getBucketInfo().updateTrusted(); - } - } - - LOG(spam, - "After merging info from nodes [%s], bucket %s had info %s", - requestNodesToString().c_str(), - bucketId.toString().c_str(), - e.getBucketInfo().toString().c_str()); - - return true; -} - void -PendingClusterState::addToBucketDB(BucketDatabase& db, - const Range& range) +PendingClusterState::mergeIntoBucketDatabases() { - LOG(spam, "Adding new bucket %s with %d copies", - _entries[range.first].bucketId.toString().c_str(), - range.second - range.first); - - BucketDatabase::Entry e(_entries[range.first].bucketId, BucketInfo()); - insertInfo(e, range); - if (e->getLastGarbageCollectionTime() == 0) { - e->setLastGarbageCollectionTime( - framework::MicroSecTime(_creationTimestamp) - .getSeconds().getTime()); - } - e.getBucketInfo().updateTrusted(); - db.update(e); -} - -void -PendingClusterState::mergeInto(BucketDatabase& db) -{ - std::sort(_entries.begin(), _entries.end()); - - db.forEach(*this); - - for (uint32_t i = 0; i < _removedBuckets.size(); ++i) { - db.remove(_removedBuckets[i]); - } - _removedBuckets.clear(); - - // All of the remaining were not already in the bucket database. - while (_iter < _entries.size()) { - _missingEntries.push_back(skipAllForSameBucket()); - } - - for (uint32_t i = 0; i < _missingEntries.size(); ++i) { - addToBucketDB(db, _missingEntries[i]); + for (auto &elem : _pendingTransitions) { + elem.second->mergeIntoBucketDatabase(); } } @@ -603,11 +289,9 @@ PendingClusterState::printXml(vespalib::XmlOutputStream& xos) const using namespace vespalib::xml; xos << XmlTag("systemstate_pending") << XmlAttribute("state", _newClusterState); - for (std::map<uint64_t, uint16_t>::const_iterator iter - = _sentMessages.begin(); iter != _sentMessages.end(); ++iter) - { + for (auto &elem : _sentMessages) { xos << XmlTag("pending") - << XmlAttribute("node", iter->second) + << XmlAttribute("node", elem.second.node) << XmlEndTag(); } xos << XmlEndTag(); @@ -621,4 +305,12 @@ PendingClusterState::getSummary() const (_clock.getTimeInMicros().getTime() - _creationTimestamp)); } +PendingBucketSpaceDbTransition & +PendingClusterState::getPendingBucketSpaceDbTransition(document::BucketSpace bucketSpace) +{ + auto transitionIter = _pendingTransitions.find(bucketSpace); + assert(transitionIter != _pendingTransitions.end()); + return *transitionIter->second; +} + } diff --git a/storage/src/vespa/storage/distributor/pendingclusterstate.h b/storage/src/vespa/storage/distributor/pendingclusterstate.h index 316d7996d81..2d75c795745 100644 --- a/storage/src/vespa/storage/distributor/pendingclusterstate.h +++ b/storage/src/vespa/storage/distributor/pendingclusterstate.h @@ -1,43 +1,32 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "pending_bucket_space_db_transition_entry.h" #include "clusterinformation.h" -#include <vespa/storage/bucketdb/bucketdatabase.h> #include <vespa/storage/common/storagelink.h> #include <vespa/storageapi/message/bucket.h> #include <vespa/storageapi/message/state.h> #include <vespa/storageframework/generic/clock/clock.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/util/xmlserializable.h> -#include <unordered_set> +#include "outdated_nodes_map.h" +#include <unordered_map> #include <deque> namespace storage::distributor { class DistributorMessageSender; +class PendingBucketSpaceDbTransition; +class DistributorBucketSpaceRepo; /** * Class used by BucketDBUpdater to track request bucket info * messages sent to the storage nodes. */ -class PendingClusterState : public vespalib::XmlSerializable, - public BucketDatabase::MutableEntryProcessor { +class PendingClusterState : public vespalib::XmlSerializable { public: - struct Entry { - Entry(const document::BucketId& bid, - const BucketCopy& copy_) - : bucketId(bid), - copy(copy_) - {} - - document::BucketId bucketId; - BucketCopy copy; - - bool operator<(const Entry& other) const { - return bucketId.toKey() < other.bucketId.toKey(); - } - }; - + using OutdatedNodes = dbtransition::OutdatedNodes; + using OutdatedNodesMap = dbtransition::OutdatedNodesMap; struct Summary { Summary(const std::string& prevClusterState, const std::string& newClusterState, uint32_t processingTime); Summary(const Summary &); @@ -51,19 +40,18 @@ public: uint32_t _processingTime; }; - typedef std::vector<Entry> EntryList; - static std::unique_ptr<PendingClusterState> createForClusterStateChange( const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, - const std::unordered_set<uint16_t>& outdatedNodes, + const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp) { return std::unique_ptr<PendingClusterState>( - new PendingClusterState(clock, clusterInfo, sender, newStateCmd, - outdatedNodes, + new PendingClusterState(clock, clusterInfo, sender, bucketSpaceRepo, newStateCmd, + outdatedNodesMap, creationTimestamp)); } @@ -75,10 +63,11 @@ public: const framework::Clock& clock, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, api::Timestamp creationTimestamp) { return std::unique_ptr<PendingClusterState>( - new PendingClusterState(clock, clusterInfo, sender, creationTimestamp)); + new PendingClusterState(clock, clusterInfo, sender, bucketSpaceRepo, creationTimestamp)); } PendingClusterState(const PendingClusterState &) = delete; @@ -92,18 +81,13 @@ public: bool onRequestBucketInfoReply(const std::shared_ptr<api::RequestBucketInfoReply>& reply); /** - * Tags the given node as having replied to the - * request bucket info command. + * Tags the given node as having replied to at least one of the + * request bucket info commands. Only used for debug logging. */ void setNodeReplied(uint16_t nodeIdx) { _requestedNodes[nodeIdx] = true; } - /** - * Adds info from a node to our list of information. - */ - void addNodeInfo(const document::BucketId& id, const BucketCopy& copy); - /** Called to resend delayed resends due to failures. */ void resendDelayedMessages(); @@ -129,9 +113,6 @@ public: const lib::ClusterState& getPrevClusterState() const { return _prevClusterState; } - const lib::Distribution& getDistribution() const { - return _clusterInfo->getDistribution(); - } /** * Returns the union set of the outdated node set provided at construction @@ -140,22 +121,18 @@ public: * state was constructed for a distribution config change, this set will * be equal to the set of all available storage nodes. */ - std::unordered_set<uint16_t> getOutdatedNodeSet() const; + OutdatedNodesMap getOutdatedNodesMap() const; /** - * Merges all the results with the given bucket database. + * Merges all the results with the corresponding bucket databases. */ - void mergeInto(BucketDatabase& db); - bool process(BucketDatabase::Entry& e) override; - const EntryList& results() const { return _entries; } + void mergeIntoBucketDatabases(); + // Get pending transition for a specific bucket space. Only used by unit test. + PendingBucketSpaceDbTransition &getPendingBucketSpaceDbTransition(document::BucketSpace bucketSpace); - /** - * Returns true if this pending state was due to a distribution bit - * change rather than an actual state change. - */ - bool distributionChange() const { return _distributionChange; } void printXml(vespalib::XmlOutputStream&) const override; Summary getSummary() const; + std::string requestNodesToString() const; private: /** @@ -166,8 +143,9 @@ private: const framework::Clock&, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, const std::shared_ptr<api::SetSystemStateCommand>& newStateCmd, - const std::unordered_set<uint16_t>& outdatedNodes, + const OutdatedNodesMap &outdatedNodesMap, api::Timestamp creationTimestamp); /** @@ -178,16 +156,23 @@ private: const framework::Clock&, const ClusterInformation::CSP& clusterInfo, DistributorMessageSender& sender, + DistributorBucketSpaceRepo &bucketSpaceRepo, api::Timestamp creationTimestamp); + struct BucketSpaceAndNode { + document::BucketSpace bucketSpace; + uint16_t node; + BucketSpaceAndNode(document::BucketSpace bucketSpace_, + uint16_t node_) + : bucketSpace(bucketSpace_), + node(node_) + { + } + }; + + void initializeBucketSpaceTransitions(bool distributionChanged, const OutdatedNodesMap &outdatedNodesMap); void logConstructionInformation() const; - void requestNode(uint16_t node); - bool distributorChanged(const lib::ClusterState& oldState, const lib::ClusterState& newState); - bool storageNodeMayHaveLostData(uint16_t index); - bool storageNodeChanged(uint16_t index); - void markAllAvailableNodesAsRequiringRequest(); - void addAdditionalNodesToOutdatedSet(const std::unordered_set<uint16_t>& nodes); - void updateSetOfNodesThatAreOutdated(); + void requestNode(BucketSpaceAndNode bucketSpaceAndNode); void requestNodes(); void requestBucketInfoFromStorageNodesWithChangedState(); @@ -198,58 +183,14 @@ private: bool shouldRequestBucketInfo() const; bool clusterIsDown() const; bool iAmDown() const; - bool nodeInSameGroupAsSelf(uint16_t index) const; - bool nodeNeedsOwnershipTransferFromGroupDown(uint16_t nodeIndex, const lib::ClusterState& state) const; - bool nodeWasUpButNowIsDown(const lib::State& old, const lib::State& nw) const; - - typedef std::pair<uint32_t, uint32_t> Range; - - /** - * Skips through all entries for the same bucket and returns - * the range in the entry list for which they were found. - * The range is [from, to> - */ - Range skipAllForSameBucket(); - - void insertInfo(BucketDatabase::Entry& info, const Range& range); - void addToBucketDB(BucketDatabase& db, const Range& range); - - std::vector<BucketCopy> getCopiesThatAreNewOrAltered(BucketDatabase::Entry& info, const Range& range); - - std::string requestNodesToString(); - - // Returns whether at least one replica was removed from the entry. - // Does NOT implicitly update trusted status on remaining replicas; caller must do - // this explicitly. - bool removeCopiesFromNodesThatWereRequested(BucketDatabase::Entry& e, const document::BucketId& bucketId); - - bool databaseIteratorHasPassedBucketInfoIterator(const document::BucketId& bucketId) const; - bool bucketInfoIteratorPointsToBucket(const document::BucketId& bucketId) const; - - bool nodeIsOutdated(uint16_t node) const { - return (_outdatedNodes.find(node) != _outdatedNodes.end()); - } bool storageNodeUpInNewState(uint16_t node) const; std::shared_ptr<api::SetSystemStateCommand> _cmd; - std::map<uint64_t, uint16_t> _sentMessages; + std::map<uint64_t, BucketSpaceAndNode> _sentMessages; std::vector<bool> _requestedNodes; - std::vector<document::BucketId> _removedBuckets; - std::deque<std::pair<framework::MilliSecTime, uint16_t> > _delayedRequests; - - // Set for all nodes that may have changed state since that previous - // active cluster state, or that were marked as outdated when the pending - // cluster state was constructed. - // May be a superset of _requestedNodes, as some nodes that are outdated - // may be down and thus cannot get a request. - std::unordered_set<uint16_t> _outdatedNodes; - - EntryList _entries; - uint32_t _iter; - - std::vector<Range> _missingEntries; + std::deque<std::pair<framework::MilliSecTime, BucketSpaceAndNode> > _delayedRequests; lib::ClusterState _prevClusterState; lib::ClusterState _newClusterState; @@ -259,9 +200,10 @@ private: api::Timestamp _creationTimestamp; DistributorMessageSender& _sender; + DistributorBucketSpaceRepo &_bucketSpaceRepo; - bool _distributionChange; bool _bucketOwnershipTransfer; + std::unordered_map<document::BucketSpace, std::unique_ptr<PendingBucketSpaceDbTransition>, document::BucketSpace::hash> _pendingTransitions; }; } diff --git a/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp b/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp index 6d562510f23..f7e207d7b8c 100644 --- a/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/pendingmessagetracker.cpp @@ -77,7 +77,7 @@ pairAsRange(Pair pair) std::vector<uint64_t> PendingMessageTracker::clearMessagesForNode(uint16_t node) { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); MessagesByNodeAndBucket& idx(boost::multi_index::get<1>(_messages)); auto range = pairAsRange(idx.equal_range(boost::make_tuple(node))); @@ -95,7 +95,7 @@ void PendingMessageTracker::insert( const std::shared_ptr<api::StorageMessage>& msg) { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); if (msg->getAddress()) { _messages.insert( MessageEntry(currentTime(), @@ -118,7 +118,7 @@ PendingMessageTracker::insert( document::Bucket PendingMessageTracker::reply(const api::StorageReply& r) { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); document::Bucket bucket; LOG(debug, "Got reply: %s", r.toString().c_str()); @@ -171,7 +171,7 @@ PendingMessageTracker::updateOperationStats(OperationStats& opStats, NodeStatsSnapshot PendingMessageTracker::getLatencyStatistics() const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); NodeStatsSnapshot snapshot; // Conveniently, snapshot data structure is exactly the same as our own. snapshot.nodeToStats = _nodeIndexToStats; @@ -205,7 +205,7 @@ PendingMessageTracker::checkPendingMessages(uint16_t node, const document::Bucket &bucket, Checker& checker) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); const MessagesByNodeAndBucket& msgs(boost::multi_index::get<1>(_messages)); auto range = pairAsRange(msgs.equal_range(boost::make_tuple(node, bucket))); @@ -216,7 +216,7 @@ void PendingMessageTracker::checkPendingMessages(const document::Bucket &bucket, Checker& checker) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); const MessagesByBucketAndType& msgs(boost::multi_index::get<2>(_messages)); auto range = pairAsRange(msgs.equal_range(boost::make_tuple(bucket))); @@ -228,7 +228,7 @@ PendingMessageTracker::hasPendingMessage(uint16_t node, const document::Bucket &bucket, uint32_t messageType) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); const MessagesByNodeAndBucket& msgs(boost::multi_index::get<1>(_messages)); auto range = msgs.equal_range(boost::make_tuple(node, bucket, messageType)); @@ -247,7 +247,7 @@ PendingMessageTracker::getStatusStartPage(std::ostream& out) const void PendingMessageTracker::getStatusPerBucket(std::ostream& out) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); const MessagesByNodeAndBucket& msgs = boost::multi_index::get<1>(_messages); using BucketMap = std::map<document::Bucket, std::vector<vespalib::string>>; @@ -285,7 +285,7 @@ PendingMessageTracker::getStatusPerBucket(std::ostream& out) const void PendingMessageTracker::getStatusPerNode(std::ostream& out) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); const MessagesByNodeAndBucket& msgs = boost::multi_index::get<1>(_messages); int lastNode = -1; for (MessagesByNodeAndBucket::const_iterator iter = @@ -337,7 +337,7 @@ PendingMessageTracker::print(std::ostream& /*out*/, NodeStats PendingMessageTracker::getNodeStats(uint16_t node) const { - vespalib::LockGuard guard(_lock); + std::lock_guard<std::mutex> guard(_lock); auto nodeIter = _nodeIndexToStats.find(node); return (nodeIter != _nodeIndexToStats.end() ? nodeIter->second : NodeStats()); diff --git a/storage/src/vespa/storage/distributor/pendingmessagetracker.h b/storage/src/vespa/storage/distributor/pendingmessagetracker.h index 48999cc837b..e7bcf85a38d 100644 --- a/storage/src/vespa/storage/distributor/pendingmessagetracker.h +++ b/storage/src/vespa/storage/distributor/pendingmessagetracker.h @@ -10,7 +10,6 @@ #include <vespa/storageapi/messageapi/returncode.h> #include <vespa/storageapi/message/bucket.h> #include <vespa/vespalib/stllike/hash_set.h> -#include <vespa/vespalib/util/sync.h> #include <boost/multi_index_container.hpp> #include <boost/multi_index/identity.hpp> #include <boost/multi_index/member.hpp> @@ -22,6 +21,7 @@ #include <set> #include <unordered_map> #include <chrono> +#include <mutex> namespace storage { namespace distributor { @@ -220,7 +220,7 @@ private: // Since distributor is currently single-threaded, this will only // contend when status page is being accessed. It is, however, required // to be present for that exact purpose. - vespalib::Lock _lock; + mutable std::mutex _lock; /** * Increment latency and operation count stats for the node the message diff --git a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp index a1b43149963..1519a9183ba 100644 --- a/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp +++ b/storage/src/vespa/storage/distributor/persistencemessagetracker.cpp @@ -4,6 +4,8 @@ #include <vespa/storage/common/vectorprinter.h> #include <vespa/storage/common/bucketoperationlogger.h> #include <vespa/storageapi/message/persistence.h> +#include "distributor_bucket_space_repo.h" +#include "distributor_bucket_space.h" #include <vespa/log/log.h> @@ -123,8 +125,9 @@ PersistenceMessageTrackerImpl::canSendReplyEarly() const LOG(spam, "Can't return early because we have already replied or failed"); return false; } - - const lib::Distribution& distribution = _manager.getDistribution(); + auto &bucketSpaceRepo(_manager.getBucketSpaceRepo()); + auto &bucketSpace(bucketSpaceRepo.get(_reply->getBucket().getBucketSpace())); + const lib::Distribution& distribution = bucketSpace.getDistribution(); if (distribution.getInitialRedundancy() == 0) { LOG(spam, "Not returning early because initial redundancy wasn't set"); @@ -163,12 +166,14 @@ PersistenceMessageTrackerImpl::checkCopiesDeleted() // Don't check the buckets that have been remapped here, as we will // create them. + const auto &bucketSpaceRepo(_manager.getBucketSpaceRepo()); for (BucketInfoMap::const_iterator iter = _bucketInfo.begin(); iter != _bucketInfo.end(); iter++) { - BucketDatabase::Entry dbentry = - _manager.getBucketDatabase().get(iter->first.getBucketId()); + const auto &bucketSpace(bucketSpaceRepo.get(iter->first.getBucketSpace())); + const auto &bucketDb(bucketSpace.getBucketDatabase()); + BucketDatabase::Entry dbentry = bucketDb.get(iter->first.getBucketId()); if (!dbentry.valid()) { continue; diff --git a/storage/src/vespa/storage/distributor/simpleclusterinformation.h b/storage/src/vespa/storage/distributor/simpleclusterinformation.h index e6e46890c3f..2946abf620c 100644 --- a/storage/src/vespa/storage/distributor/simpleclusterinformation.h +++ b/storage/src/vespa/storage/distributor/simpleclusterinformation.h @@ -11,11 +11,9 @@ class SimpleClusterInformation : public ClusterInformation { public: SimpleClusterInformation(uint16_t myIndex, - const lib::Distribution& distribution, const lib::ClusterState& clusterState, const char* storageUpStates) : _myIndex(myIndex), - _distribution(distribution.serialize()), _clusterState(clusterState), _storageUpStates(storageUpStates) {} @@ -24,10 +22,6 @@ public: return _myIndex; } - const lib::Distribution& getDistribution() const override { - return _distribution; - } - const lib::ClusterState& getClusterState() const override { return _clusterState; } @@ -38,7 +32,6 @@ public: private: uint16_t _myIndex; - lib::Distribution _distribution; lib::ClusterState _clusterState; const char* _storageUpStates; }; diff --git a/storage/src/vespa/storage/distributor/statechecker.cpp b/storage/src/vespa/storage/distributor/statechecker.cpp index 0107430bb96..f959e5a80fb 100644 --- a/storage/src/vespa/storage/distributor/statechecker.cpp +++ b/storage/src/vespa/storage/distributor/statechecker.cpp @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "statechecker.h" #include "distributorcomponent.h" +#include "distributor_bucket_space.h" #include <vespa/log/log.h> LOG_SETUP(".distributor.statechecker"); @@ -59,22 +60,23 @@ StateChecker::Result::createStoredResult( } StateChecker::Context::Context(const DistributorComponent& c, + const DistributorBucketSpace &distributorBucketSpace, NodeMaintenanceStatsTracker& statsTracker, - const document::BucketId& bid) - : bucketId(bid), - siblingBucket(c.getSibling(bid)), + const document::Bucket &bucket_) + : bucket(bucket_), + siblingBucket(c.getSibling(bucket.getBucketId())), systemState(c.getClusterState()), distributorConfig(c.getDistributor().getConfig()), - distribution(c.getDistribution()), + distribution(distributorBucketSpace.getDistribution()), gcTimeCalculator(c.getDistributor().getBucketIdHasher(), std::chrono::seconds(distributorConfig .getGarbageCollectionInterval())), component(c), - db(c.getBucketDatabase()), + db(distributorBucketSpace.getBucketDatabase()), stats(statsTracker) { idealState = - distribution.getIdealStorageNodes(systemState, bucketId); + distribution.getIdealStorageNodes(systemState, bucket.getBucketId()); unorderedIdealState.insert(idealState.begin(), idealState.end()); } diff --git a/storage/src/vespa/storage/distributor/statechecker.h b/storage/src/vespa/storage/distributor/statechecker.h index fbadd5642d4..e204cf5325a 100644 --- a/storage/src/vespa/storage/distributor/statechecker.h +++ b/storage/src/vespa/storage/distributor/statechecker.h @@ -2,7 +2,7 @@ #pragma once #include "bucketgctimecalculator.h" -#include "maintenancebucket.h" +#include <vespa/storage/distributor/maintenance/maintenancepriority.h> #include <vespa/storage/distributor/operations/idealstate/idealstateoperation.h> #include <vespa/storage/common/storagecomponent.h> #include <vespa/storage/bucketdb/bucketdatabase.h> @@ -19,6 +19,7 @@ class DistributorConfiguration; namespace distributor { class DistributorComponent; +class DistributorBucketSpace; class NodeMaintenanceStatsTracker; /** @@ -45,15 +46,16 @@ public: struct Context { Context(const DistributorComponent&, + const DistributorBucketSpace &distributorBucketSpace, NodeMaintenanceStatsTracker&, - const document::BucketId& bid); + const document::Bucket &bucket_); ~Context(); Context(const Context &) = delete; Context & operator =(const Context &) = delete; // Per bucket - document::BucketId bucketId; + document::Bucket bucket; document::BucketId siblingBucket; BucketDatabase::Entry entry; @@ -82,7 +84,8 @@ public: return siblingEntry; } - document::Bucket getBucket() const { return document::Bucket(document::BucketSpace::placeHolder(), bucketId); } + document::Bucket getBucket() const { return bucket; } + document::BucketId getBucketId() const { return bucket.getBucketId(); } std::string toString() const; }; diff --git a/storage/src/vespa/storage/distributor/statecheckers.cpp b/storage/src/vespa/storage/distributor/statecheckers.cpp index 35d111a8c38..1f0cb19ef93 100644 --- a/storage/src/vespa/storage/distributor/statecheckers.cpp +++ b/storage/src/vespa/storage/distributor/statecheckers.cpp @@ -27,12 +27,12 @@ SplitBucketStateChecker::validForSplit(StateChecker::Context& c) if (c.entry->getNodeCount() == 0) { LOG(spam, "Can't split bucket %s, since it has no copies", - c.bucketId.toString().c_str()); + c.bucket.toString().c_str()); return false; } // Can't split anymore if we already used 58 bits. - if (c.bucketId.getUsedBits() >= 58) { + if (c.getBucketId().getUsedBits() >= 58) { return false; } @@ -145,7 +145,7 @@ SplitBucketStateChecker::check(StateChecker::Context& c) { } // Always split it if it has less used bits than the minimum. - if (c.bucketId.getUsedBits() < c.distributorConfig.getMinimalBucketSplit()) { + if (c.getBucketId().getUsedBits() < c.distributorConfig.getMinimalBucketSplit()) { return generateMinimumBucketSplitOperation(c); } return Result::noMaintenanceNeeded(); @@ -217,7 +217,7 @@ JoinBucketsStateChecker::siblingsAreInSync(const Context& context) const LOG(spam, "Not joining bucket %s because sibling bucket %s had different " "node count", - context.bucketId.toString().c_str(), + context.bucket.toString().c_str(), context.siblingBucket.toString().c_str()); return false; } @@ -238,7 +238,7 @@ JoinBucketsStateChecker::siblingsAreInSync(const Context& context) const "does not have the same node set, or inconsistent joins cannot be " "performed either due to config or because replicas were not in " "their ideal location", - context.bucketId.toString().c_str(), + context.bucket.toString().c_str(), context.siblingBucket.toString().c_str()); return false; } @@ -247,7 +247,7 @@ JoinBucketsStateChecker::siblingsAreInSync(const Context& context) const LOG(spam, "Not joining bucket %s because it or %s is out of sync " "and syncing it may cause it to become too large", - context.bucketId.toString().c_str(), + context.bucket.toString().c_str(), context.siblingBucket.toString().c_str()); return false; } @@ -258,8 +258,8 @@ JoinBucketsStateChecker::siblingsAreInSync(const Context& context) const bool JoinBucketsStateChecker::singleBucketJoinIsConsistent(const Context& c) const { - document::BucketId joinTarget(c.bucketId.getUsedBits() - 1, - c.bucketId.getRawId()); + document::BucketId joinTarget(c.getBucketId().getUsedBits() - 1, + c.getBucketId().getRawId()); // If there are 2 children under the potential join target bucket, joining // would cause the bucket tree to become inconsistent. The reason for this // being that "moving" a bucket one bit up in the tree (and into @@ -305,30 +305,30 @@ JoinBucketsStateChecker::shouldJoin(const Context& c) const { if (c.entry->getNodeCount() == 0) { LOG(spam, "Not joining bucket %s because it has no nodes", - c.bucketId.toString().c_str()); + c.bucket.toString().c_str()); return false; } if (contextBucketHasTooManyReplicas(c)) { LOG(spam, "Not joining %s because it has too high replication level", - c.bucketId.toString().c_str()); + c.bucket.toString().c_str()); return false; } if (c.distributorConfig.getJoinSize() == 0 && c.distributorConfig.getJoinCount() == 0) { LOG(spam, "Not joining bucket %s because join is disabled", - c.bucketId.toString().c_str()); + c.bucket.toString().c_str()); return false; } - if (bucketAtDistributionBitLimit(c.bucketId, c)) { + if (bucketAtDistributionBitLimit(c.getBucketId(), c)) { LOG(spam, "Not joining bucket %s because it is below the min split " "count (config: %u, cluster state: %u, bucket has: %u)", - c.bucketId.toString().c_str(), + c.bucket.toString().c_str(), c.distributorConfig.getMinimalBucketSplit(), c.systemState.getDistributionBitCount(), - c.bucketId.getUsedBits()); + c.getBucketId().getUsedBits()); return false; } @@ -337,11 +337,11 @@ JoinBucketsStateChecker::shouldJoin(const Context& c) const } if (c.getSiblingEntry().valid()) { - if (!isFirstSibling(c.bucketId)) { + if (!isFirstSibling(c.getBucketId())) { LOG(spam, "Not joining bucket %s because it is the second sibling of " "%s and not the first", - c.bucketId.toString().c_str(), + c.bucket.toString().c_str(), c.siblingBucket.toString().c_str()); return false; } @@ -427,8 +427,8 @@ JoinBucketsStateChecker::computeJoinBucket(const Context& c) const { // Always decrease by at least 1 bit, as we could not get here unless this // were a valid outcome. - unsigned int level = c.bucketId.getUsedBits() - 1; - document::BucketId target(level, c.bucketId.getRawId()); + unsigned int level = c.getBucketId().getUsedBits() - 1; + document::BucketId target(level, c.getBucketId().getRawId()); // Push bucket up the tree as long as it gets no siblings. This means // joins involving 2 source buckets will currently only be decreased by 1 @@ -436,7 +436,7 @@ JoinBucketsStateChecker::computeJoinBucket(const Context& c) const // be decreased by multiple bits. We may want to optimize joins for cases // with 2 source buckets in the future. while (true) { - document::BucketId candidate(level, c.bucketId.getRawId()); + document::BucketId candidate(level, c.getBucketId().getRawId()); if (bucketHasMultipleChildren(candidate, c) || !legalBucketSplitLevel(candidate, c)) { @@ -445,7 +445,7 @@ JoinBucketsStateChecker::computeJoinBucket(const Context& c) const --level; target = candidate; } - return document::Bucket(BucketSpace::placeHolder(), target); + return document::Bucket(c.getBucket().getBucketSpace(), target); } StateChecker::Result @@ -458,15 +458,15 @@ JoinBucketsStateChecker::check(StateChecker::Context& c) } document::Bucket joinedBucket(computeJoinBucket(c)); - assert(joinedBucket.getBucketId().getUsedBits() < c.bucketId.getUsedBits()); + assert(joinedBucket.getBucketId().getUsedBits() < c.getBucketId().getUsedBits()); std::vector<document::BucketId> sourceBuckets; if (c.getSiblingEntry().valid()) { sourceBuckets.push_back(c.siblingBucket); } else { - sourceBuckets.push_back(c.bucketId); + sourceBuckets.push_back(c.getBucketId()); } - sourceBuckets.push_back(c.bucketId); + sourceBuckets.push_back(c.getBucketId()); IdealStateOperation::UP op(new JoinOperation( c.component.getClusterName(), BucketAndNodes(joinedBucket, c.entry->getNodes()), @@ -568,7 +568,7 @@ SplitInconsistentStateChecker::check(StateChecker::Context& c) return Result::noMaintenanceNeeded(); } - if (!isLeastSplitBucket(c.bucketId, c.entries)) { + if (!isLeastSplitBucket(c.getBucketId(), c.entries)) { return Result::noMaintenanceNeeded(); } @@ -581,7 +581,7 @@ SplitInconsistentStateChecker::check(StateChecker::Context& c) op->setPriority(c.distributorConfig.getMaintenancePriorities() .splitInconsistentBucket); - op->setDetailedReason(getReason(c.bucketId, c.entries)); + op->setDetailedReason(getReason(c.getBucketId(), c.entries)); return Result::createStoredResult(std::move(op), MaintenancePriority::HIGH); } @@ -849,14 +849,14 @@ SynchronizeAndMoveStateChecker::check(StateChecker::Context& c) op->setPriority(result.priority()); op->setDetailedReason(result.reason()); MaintenancePriority::Priority schedPri( - result.needsMoveOnly() ? MaintenancePriority::VERY_LOW + result.needsMoveOnly() ? MaintenancePriority::LOW : MaintenancePriority::MEDIUM); return Result::createStoredResult(std::move(op), schedPri); } else { LOG(spam, "Bucket %s: No need for merge, as bucket is in consistent state " "(or inconsistent buckets are empty) %s", - c.bucketId.toString().c_str(), + c.bucket.toString().c_str(), c.entry->toString().c_str()); return Result::noMaintenanceNeeded(); } @@ -1119,7 +1119,7 @@ GarbageCollectionStateChecker::needsGarbageCollection(const Context& c) const std::chrono::seconds currentTime( c.component.getClock().getTimeInSeconds().getTime()); - return c.gcTimeCalculator.shouldGc(c.bucketId, currentTime, lastRunAt); + return c.gcTimeCalculator.shouldGc(c.getBucketId(), currentTime, lastRunAt); } StateChecker::Result @@ -1142,7 +1142,7 @@ GarbageCollectionStateChecker::check(Context& c) op->setPriority(c.distributorConfig.getMaintenancePriorities() .garbageCollection); op->setDetailedReason(reason.c_str()); - return Result::createStoredResult(std::move(op), MaintenancePriority::LOW); + return Result::createStoredResult(std::move(op), MaintenancePriority::VERY_LOW); } else { return Result::noMaintenanceNeeded(); } diff --git a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp index da734c07c2d..6fd75e06d9d 100644 --- a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp +++ b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.cpp @@ -36,4 +36,14 @@ ServiceLayerComponentRegisterImpl::setDiskCount(uint16_t count) } } +void +ServiceLayerComponentRegisterImpl::setDistribution(lib::Distribution::SP distribution) +{ + // For now, copy distribution to all content bucket spaces + for (const auto &elem : _bucketSpaceRepo) { + elem.second->setDistribution(distribution); + } + StorageComponentRegisterImpl::setDistribution(distribution); +} + } // storage diff --git a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h index 5b3e54e3831..df4047c92c3 100644 --- a/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h +++ b/storage/src/vespa/storage/frameworkimpl/component/servicelayercomponentregisterimpl.h @@ -37,6 +37,7 @@ public: void registerServiceLayerComponent(ServiceLayerManagedComponent&) override; void setDiskCount(uint16_t count); + virtual void setDistribution(lib::Distribution::SP distribution) override; }; } // storage diff --git a/storage/src/vespa/storage/persistence/bucketownershipnotifier.cpp b/storage/src/vespa/storage/persistence/bucketownershipnotifier.cpp index ae028dd20bd..900fa71d7a0 100644 --- a/storage/src/vespa/storage/persistence/bucketownershipnotifier.cpp +++ b/storage/src/vespa/storage/persistence/bucketownershipnotifier.cpp @@ -3,6 +3,7 @@ #include "bucketownershipnotifier.h" #include <vespa/storage/common/nodestateupdater.h> #include <vespa/storage/common/bucketoperationlogger.h> +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storageapi/message/bucket.h> #include <vespa/vdslib/distribution/distribution.h> #include <vespa/vespalib/util/backtrace.h> @@ -19,7 +20,8 @@ BucketOwnershipNotifier::getOwnerDistributorForBucket( const document::Bucket &bucket) const { try { - return (_component.getDistribution()->getIdealDistributorNode( + auto distribution(_component.getBucketSpaceRepo().get(bucket.getBucketSpace()).getDistribution()); + return (distribution->getIdealDistributorNode( *_component.getStateUpdater().getSystemState(), bucket.getBucketId())); // If we get exceptions there aren't any distributors, so they'll have // to explicitly fetch all bucket info eventually anyway. diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp index fffd559866d..31f712faea2 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp @@ -742,7 +742,7 @@ FileStorHandlerImpl::calculateTargetBasedOnDocId( std::vector<RemapInfo*>& targets) { document::DocumentId id(getDocId(msg)); - document::Bucket bucket(BucketSpace::placeHolder(), _bucketIdFactory.getBucketId(id)); + document::Bucket bucket(msg.getBucket().getBucketSpace(), _bucketIdFactory.getBucketId(id)); for (uint32_t i = 0; i < targets.size(); i++) { if (targets[i]->bucket.getBucketId().getRawId() != 0 && diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp index 88a7343f8c8..cf41a297541 100644 --- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp @@ -966,7 +966,6 @@ void FileStorManager::updateState() { lib::ClusterState::CSP state(_component.getStateUpdater().getSystemState()); - spi::ClusterState spiState(*state, _component.getIndex(), *_component.getDistribution()); lib::Node node(_component.getNodeType(), _component.getIndex()); bool nodeUp = state->getNodeState(node).getState().oneOf("uir"); @@ -977,7 +976,11 @@ FileStorManager::updateState() Deactivator deactivator; _component.getBucketSpaceRepo().forEachBucket(deactivator, "FileStorManager::updateState"); } - _provider->setClusterState(spiState); + for (const auto &elem : _component.getBucketSpaceRepo()) { + BucketSpace bucketSpace(elem.first); + spi::ClusterState spiState(*state, _component.getIndex(), *elem.second->getDistribution()); + _provider->setClusterState(bucketSpace, spiState); + } _nodeUpInLastNodeStateSeenByProvider = nodeUp; } diff --git a/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.cpp b/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.cpp index 093576622db..1834c93209d 100644 --- a/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.cpp +++ b/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.cpp @@ -12,6 +12,32 @@ using document::BucketSpace; namespace storage { +ModifiedBucketChecker::CyclicBucketSpaceIterator:: +CyclicBucketSpaceIterator(const ContentBucketSpaceRepo::BucketSpaces &bucketSpaces) + : _bucketSpaces(bucketSpaces), + _idx(0) +{ + std::sort(_bucketSpaces.begin(), _bucketSpaces.end()); +} + +ModifiedBucketChecker::BucketIdListResult::BucketIdListResult() + : _bucketSpace(document::BucketSpace::placeHolder()), + _buckets() +{ +} + +void +ModifiedBucketChecker::BucketIdListResult::reset(document::BucketSpace bucketSpace, + document::bucket::BucketIdList &buckets) +{ + _bucketSpace = bucketSpace; + assert(_buckets.empty()); + _buckets.swap(buckets); + // We pick chunks from the end of the list, so reverse it to get + // the same send order as order received. + std::reverse(_buckets.begin(), _buckets.end()); +} + ModifiedBucketChecker::ModifiedBucketChecker( ServiceLayerComponentRegister& compReg, spi::PersistenceProvider& provider, @@ -23,6 +49,8 @@ ModifiedBucketChecker::ModifiedBucketChecker( _configFetcher(configUri.getContext()), _monitor(), _stateLock(), + _bucketSpaces(), + _rechecksNotStarted(), _pendingRequests(0), _maxPendingChunkSize(100), _singleThreadMode(false) @@ -33,6 +61,7 @@ ModifiedBucketChecker::ModifiedBucketChecker( std::ostringstream threadName; threadName << "Modified bucket checker " << static_cast<void*>(this); _component.reset(new ServiceLayerComponent(compReg, threadName.str())); + _bucketSpaces = std::make_unique<CyclicBucketSpaceIterator>(_component->getBucketSpaceRepo().getBucketSpaces()); } ModifiedBucketChecker::~ModifiedBucketChecker() @@ -120,9 +149,9 @@ ModifiedBucketChecker::onInternalReply( } bool -ModifiedBucketChecker::requestModifiedBucketsFromProvider() +ModifiedBucketChecker::requestModifiedBucketsFromProvider(document::BucketSpace bucketSpace) { - spi::BucketIdListResult result(_provider.getModifiedBuckets(document::BucketSpace::placeHolder())); + spi::BucketIdListResult result(_provider.getModifiedBuckets(bucketSpace)); if (result.hasError()) { LOG(debug, "getModifiedBuckets() failed: %s", result.toString().c_str()); @@ -130,11 +159,7 @@ ModifiedBucketChecker::requestModifiedBucketsFromProvider() } { vespalib::LockGuard guard(_stateLock); - assert(_rechecksNotStarted.empty()); - _rechecksNotStarted.swap(result.getList()); - // We pick chunks from the end of the list, so reverse it to get - // the same send order as order received. - std::reverse(_rechecksNotStarted.begin(), _rechecksNotStarted.end()); + _rechecksNotStarted.reset(bucketSpace, result.getList()); } return true; } @@ -148,7 +173,7 @@ ModifiedBucketChecker::nextRecheckChunk( size_t n = std::min(_maxPendingChunkSize, _rechecksNotStarted.size()); for (size_t i = 0; i < n; ++i) { - document::Bucket bucket(BucketSpace::placeHolder(), _rechecksNotStarted.back()); + document::Bucket bucket(_rechecksNotStarted.bucketSpace(), _rechecksNotStarted.back()); commandsToSend.emplace_back(new RecheckBucketInfoCommand(bucket)); _rechecksNotStarted.pop_back(); } @@ -184,7 +209,7 @@ ModifiedBucketChecker::tick() shouldRequestFromProvider = !moreChunksRemaining(); } if (shouldRequestFromProvider) { - if (!requestModifiedBucketsFromProvider()) { + if (!requestModifiedBucketsFromProvider(_bucketSpaces->next())) { return false; } } diff --git a/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.h b/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.h index 3e43481bf49..c6f13ce1a4c 100644 --- a/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.h +++ b/storage/src/vespa/storage/persistence/filestorage/modifiedbucketchecker.h @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/common/storagecomponent.h> #include <vespa/storage/common/servicelayercomponent.h> #include <vespa/storage/common/storagelink.h> @@ -49,17 +50,45 @@ private: bool moreChunksRemaining() const { return !_rechecksNotStarted.empty(); } - bool requestModifiedBucketsFromProvider(); + bool requestModifiedBucketsFromProvider(document::BucketSpace bucketSpace); void nextRecheckChunk(std::vector<RecheckBucketInfoCommand::SP>&); void dispatchAllToPersistenceQueues(const std::vector<RecheckBucketInfoCommand::SP>&); + class CyclicBucketSpaceIterator { + private: + ContentBucketSpaceRepo::BucketSpaces _bucketSpaces; + size_t _idx; + public: + using UP = std::unique_ptr<CyclicBucketSpaceIterator>; + CyclicBucketSpaceIterator(const ContentBucketSpaceRepo::BucketSpaces &bucketSpaces); + document::BucketSpace next() { + return _bucketSpaces[(_idx++)%_bucketSpaces.size()]; + } + }; + + class BucketIdListResult { + private: + document::BucketSpace _bucketSpace; + document::bucket::BucketIdList _buckets; + public: + BucketIdListResult(); + void reset(document::BucketSpace bucketSpace, + document::bucket::BucketIdList &buckets); + const document::BucketSpace &bucketSpace() const { return _bucketSpace; } + size_t size() const { return _buckets.size(); } + bool empty() const { return _buckets.empty(); } + const document::BucketId &back() const { return _buckets.back(); } + void pop_back() { _buckets.pop_back(); } + }; + spi::PersistenceProvider& _provider; ServiceLayerComponent::UP _component; framework::Thread::UP _thread; config::ConfigFetcher _configFetcher; vespalib::Monitor _monitor; vespalib::Lock _stateLock; - document::bucket::BucketIdList _rechecksNotStarted; + CyclicBucketSpaceIterator::UP _bucketSpaces; + BucketIdListResult _rechecksNotStarted; size_t _pendingRequests; size_t _maxPendingChunkSize; bool _singleThreadMode; // For unit testing only diff --git a/storage/src/vespa/storage/persistence/provider_error_wrapper.cpp b/storage/src/vespa/storage/persistence/provider_error_wrapper.cpp index 15b0a469b35..056561e8e21 100644 --- a/storage/src/vespa/storage/persistence/provider_error_wrapper.cpp +++ b/storage/src/vespa/storage/persistence/provider_error_wrapper.cpp @@ -55,9 +55,9 @@ ProviderErrorWrapper::listBuckets(BucketSpace bucketSpace, spi::PartitionId part } spi::Result -ProviderErrorWrapper::setClusterState(const spi::ClusterState& state) +ProviderErrorWrapper::setClusterState(BucketSpace bucketSpace, const spi::ClusterState& state) { - return checkResult(_impl.setClusterState(state)); + return checkResult(_impl.setClusterState(bucketSpace, state)); } spi::Result diff --git a/storage/src/vespa/storage/persistence/provider_error_wrapper.h b/storage/src/vespa/storage/persistence/provider_error_wrapper.h index 122837e75ed..3b5ace90d13 100644 --- a/storage/src/vespa/storage/persistence/provider_error_wrapper.h +++ b/storage/src/vespa/storage/persistence/provider_error_wrapper.h @@ -44,7 +44,7 @@ public: spi::Result initialize() override; spi::PartitionStateListResult getPartitionStates() const override; spi::BucketIdListResult listBuckets(BucketSpace bucketSpace, spi::PartitionId) const override; - spi::Result setClusterState(const spi::ClusterState&) override; + spi::Result setClusterState(BucketSpace bucketSpace, const spi::ClusterState&) override; spi::Result setActiveState(const spi::Bucket& bucket, spi::BucketInfo::ActiveState newState) override; spi::BucketInfoResult getBucketInfo(const spi::Bucket&) const override; spi::Result put(const spi::Bucket&, spi::Timestamp, const spi::DocumentSP&, spi::Context&) override; diff --git a/storage/src/vespa/storage/storageserver/bucketintegritychecker.cpp b/storage/src/vespa/storage/storageserver/bucketintegritychecker.cpp index 5a3c7e5c35c..b4c7d1e3e80 100644 --- a/storage/src/vespa/storage/storageserver/bucketintegritychecker.cpp +++ b/storage/src/vespa/storage/storageserver/bucketintegritychecker.cpp @@ -19,31 +19,6 @@ using document::BucketSpace; namespace storage { namespace { - /* - std::string printDate(time_t time) { - char date[26]; - struct tm datestruct; - struct tm* datestructptr = gmtime_r(&time, &datestruct); - assert(datestructptr); - char* result = asctime_r(&datestruct, date); - size_t size = strlen(result); - while (size > 0) { - bool stop = false; - switch (result[size - 1]) { - case '\n': - case '\r': - case '\f': - case '\t': - --size; - default: - stop = true; - break; - } - if (stop) break; - } - return std::string(result, size); - } - */ std::string printMinutesOfDay(uint32_t minutesOfDay) { std::ostringstream ost; @@ -131,7 +106,7 @@ struct NextEntryFinder { _first = false; return StorBucketDatabase::CONTINUE; } else { - _next.reset(new document::BucketId(bucket)); + _next = std::make_unique<document::BucketId>(bucket); return StorBucketDatabase::ABORT; } } @@ -145,48 +120,58 @@ std::unique_ptr<document::BucketId> getNextId(StorBucketDatabase& database, database.each(proc, "BucketIntegrityChecker::getNextId", last.toKey()); return std::move(proc._next); } + +bool allBucketSpacesExhausted(size_t index, const ContentBucketSpaceRepo::BucketSpaces& bucketSpaces) noexcept { + return (index == bucketSpaces.size() - 1); +} + } // End of anonymous namespace -document::BucketId -BucketIntegrityChecker::DiskData::iterate(StorBucketDatabase& bucketDatabase) +document::Bucket +BucketIntegrityChecker::DiskData::iterate(const ContentBucketSpaceRepo::BucketSpaces& bucketSpaces, + const ContentBucketSpaceRepo& bucketSpaceRepo) { static uint32_t i=0; - // Resend failed buckets once in a while - if (failedRepairs.size() > 0 && ++i % 10 == 9) - { - document::BucketId bid(failedRepairs.front()); + // Resend failed buckets once in a while + if (!failedRepairs.empty() && ++i % 10 == 9) { + document::Bucket bucket(failedRepairs.front()); LOG(spam, "Scheduling next bucket %s from failed repairs list", - bid.toString().c_str()); + bucket.toString().c_str()); failedRepairs.pop_front(); ++retriedBuckets; - return bid; + return bucket; } if (state == NOT_STARTED) { - // Guarantueed to be before all buckets. + // Guaranteed to be before all buckets. currentBucket = document::BucketId(0, 0); + currentBucketSpaceIndex = 0; } - if (state != DONE) { - std::unique_ptr<document::BucketId> bid( - getNextId(bucketDatabase, currentBucket, disk)); - if (bid.get()) { + while (state != DONE) { + const auto currentSpace = bucketSpaces[currentBucketSpaceIndex]; + const auto bid = getNextId(bucketSpaceRepo.get(currentSpace).bucketDatabase(), currentBucket, disk); + if (bid) { state = IN_PROGRESS; currentBucket = *bid; - return currentBucket; - } else { + return document::Bucket(currentSpace, currentBucket); + } else if (allBucketSpacesExhausted(currentBucketSpaceIndex, bucketSpaces)) { state = DONE; + break; + } else { + ++currentBucketSpaceIndex; + currentBucket = document::BucketId(0, 0); } } - // If we didn't schedule repaired, but we ended up not having any other, - // take repaired once anyways - if (failedRepairs.size() > 0) { - document::BucketId bid(failedRepairs.front()); + // If we didn't schedule repaired, but we ended up not having any other, + // take repaired once anyways + if (!failedRepairs.empty()) { + document::Bucket bucket(failedRepairs.front()); LOG(spam, "Done iterating, scheduling next bucket %s from failed " - "repairs list", bid.toString().c_str()); + "repairs list", bucket.toString().c_str()); failedRepairs.pop_front(); ++retriedBuckets; - return bid; + return bucket; } - return document::BucketId(0, 0); + return document::Bucket(bucketSpaces[currentBucketSpaceIndex], document::BucketId(0, 0)); } BucketIntegrityChecker::BucketIntegrityChecker( @@ -196,7 +181,9 @@ BucketIntegrityChecker::BucketIntegrityChecker( Runnable(), framework::HtmlStatusReporter("bucketintegritychecker", "Bucket integrity checker"), + _component(compReg, "bucketintegritychecker"), _cycleCount(0), + _bucketSpaces(_component.getBucketSpaceRepo().getBucketSpaces()), _status(), _lastCycleStart(0), _cycleStartBucketCount(0), @@ -205,19 +192,18 @@ BucketIntegrityChecker::BucketIntegrityChecker( _currentRunWithFullVerification(false), _verifyAllRepairs(false), _scheduleOptions(), - _systemState(), _wait(), _configFetcher(configUri.getContext()), - _maxThreadWaitTime(60 * 1000), - _component(compReg, "bucketintegritychecker") + _maxThreadWaitTime(60 * 1000) { + assert(!_bucketSpaces.empty()); LOG(debug, "Configuring bucket integrity checker to work with %u disks.", _component.getDiskCount()); _status.resize(_component.getDiskCount()); for (uint16_t i=0; i<_component.getDiskCount(); ++i) { _status[i].disk = i; } - if (_status.size() == 0) { + if (_status.empty()) { throw vespalib::IllegalStateException( "Cannot have storage with no disks.", VESPA_STRLOC); } @@ -254,10 +240,10 @@ BucketIntegrityChecker::~BucketIntegrityChecker() void BucketIntegrityChecker::onClose() { - // Avoid getting config during shutdown + // Avoid getting config during shutdown _configFetcher.close(); - // Close thread to ensure we don't send anything more down after - if (_thread.get() != 0) { + // Close thread to ensure we don't send anything more down after + if (_thread) { LOG(debug, "Waiting for bucket integrity worker thread to close."); _thread->interruptAndJoin(&_wait); LOG(debug, "Bucket integrity worker thread closed."); @@ -367,18 +353,20 @@ bool BucketIntegrityChecker::onInternalReply( const std::shared_ptr<api::InternalReply>& internalReply) { - // We only care about repair bucket replies - shared_ptr<RepairBucketReply> reply( - std::dynamic_pointer_cast<RepairBucketReply>(internalReply)); - if (!reply.get()) return false; + // We only care about repair bucket replies + auto reply = std::dynamic_pointer_cast<RepairBucketReply>(internalReply); + if (!reply) { + return false; + } vespalib::MonitorGuard monitor(_wait); _lastResponseTime = _component.getClock().getTimeInSeconds(); uint8_t disk = reply->getDisk(); + assert(disk < _status.size()); --_status[disk].pendingCount; LOG(spam, "Got repair reply for bucket %s: %s. %u messages still pending " "for disk %u. Bucket altered ? %s", - reply->getBucketId().toString().c_str(), + reply->getBucket().toString().c_str(), reply->getResult().toString().c_str(), _status[disk].pendingCount, disk, (reply->bucketAltered() ? "true" : "false")); @@ -399,13 +387,13 @@ BucketIntegrityChecker::onInternalReply( ++_status[disk].checkedBuckets; LOGBP(debug, "Failed to repair bucket %s due to aborting request. " "Likely bucket split/join or storage shutting down: %s", - reply->getBucketId().toString().c_str(), + reply->getBucket().toString().c_str(), reply->getResult().toString().c_str()); } else { - _status[disk].failedRepairs.push_back(reply->getBucketId()); + _status[disk].failedRepairs.push_back(reply->getBucket()); LOGBP(warning, "Failed to perform maintenance on bucket %s, " "scheduled to be retried: %s", - reply->getBucketId().toString().c_str(), + reply->getBucket().toString().c_str(), reply->getResult().toString().c_str()); } if (_lastCycleCompleted) { @@ -415,16 +403,6 @@ BucketIntegrityChecker::onInternalReply( return true; } -bool -BucketIntegrityChecker::onSetSystemState( - const std::shared_ptr<api::SetSystemStateCommand>& cmd) -{ - vespalib::MonitorGuard monitor(_wait); - _systemState = cmd->getSystemState(); - return false; -} - - SchedulingOptions::RunState BucketIntegrityChecker::getCurrentRunState( framework::SecondTime currentTime) const @@ -449,9 +427,7 @@ BucketIntegrityChecker::getCurrentRunState( ) ) { // If we're within region in day that we can run. -//std::cerr << "We're inside time boundary. Current time: " << minutesOfDay << " (" << printMinutesOfDay(minutesOfDay) << "). Running between " << _scheduleOptions._dailyCycleStart << " (" << printMinutesOfDay(_scheduleOptions._dailyCycleStart) << ") - " << _scheduleOptions._dailyCycleStop << " (" << printMinutesOfDay(_scheduleOptions._dailyCycleStop) << ")\n"; if (state == SchedulingOptions::CONTINUE) { -//std::cerr << "We're in continue state.\n"; // If we're in a continue state, set runstate if there's a current // run active that isn't completed yet, don't run otherwise. state = (_lastCycleCompleted @@ -471,18 +447,13 @@ BucketIntegrityChecker::getCurrentRunState( if (_currentRunWithFullVerification || state == SchedulingOptions::RUN_CHEAP) { -//std::cerr << "Tagging dont run since too little time passed since last run\n" << "current time: " << currentTime << ", last start " << _lastCycleStart << ", min cycle time " << _scheduleOptions._minCycleTime << "\n"; state = SchedulingOptions::DONT_RUN; } else { -//std::cerr << "We can start new run. Last cycle started at " << _lastCycleStart.toString() << " current time is " << currentTime.toString() << " and min cycle time is " << _scheduleOptions._minCycleTime << "\n"; } - } else { -//std::cerr << "Enough time passed? " << currentTime.toString() << " - " << _lastCycleStart.toString() << " >= " << _scheduleOptions._minCycleTime << "\n"; } } } else { // If we're outside of time of day boundaries, don't run -//std::cerr << "We're outside time boundary. Current time: " << minutesOfDay << " (" << printMinutesOfDay(minutesOfDay) << "). Only running between " << _scheduleOptions._dailyCycleStart << " (" << printMinutesOfDay(_scheduleOptions._dailyCycleStart) << ") - " << _scheduleOptions._dailyCycleStop << " (" << printMinutesOfDay(_scheduleOptions._dailyCycleStop) << ")\n"; state = SchedulingOptions::DONT_RUN; } return state; @@ -493,7 +464,7 @@ BucketIntegrityChecker::run(framework::ThreadHandle& thread) { while (!thread.interrupted()) { thread.registerTick(framework::PROCESS_CYCLE); - // Get the state based on the current time. + // Get the state based on the current time. framework::SecondTime currentTime( _component.getClock().getTimeInSeconds()); @@ -543,8 +514,7 @@ BucketIntegrityChecker::run(framework::ThreadHandle& thread) (_scheduleOptions._requestDelay - (currentTime - _lastResponseTime)).getMillis()); if (delay > _maxThreadWaitTime) delay = _maxThreadWaitTime; - monitor.wait(std::min(_maxThreadWaitTime.getTime(), - delay.getTime())); + monitor.wait(std::min(_maxThreadWaitTime.getTime(), delay.getTime())); thread.registerTick(framework::WAIT_CYCLE); } else if (_lastCycleCompleted && getTotalPendingCount() > 0) { LOG(spam, "Completed last cycle. Waiting until we have 0 pending " @@ -558,31 +528,29 @@ BucketIntegrityChecker::run(framework::ThreadHandle& thread) _scheduleOptions._maxPendingCount); // Else we send up to max pending and wait for responses. if (_lastCycleCompleted) { - for (uint32_t i=0; i<_status.size(); ++i) { - _status[i].state = DiskData::NOT_STARTED; - _status[i].failedRepairs.clear(); - _status[i].checkedBuckets = 0; - _status[i].retriedBuckets = 0; + for (auto& disk : _status) { + disk.state = DiskData::NOT_STARTED; + disk.failedRepairs.clear(); + disk.checkedBuckets = 0; + disk.retriedBuckets = 0; } LOG(info, "Starting new verification/repair cycle at time %s.", currentTime.toString().c_str()); _lastCycleStart = currentTime; - _cycleStartBucketCount = _component.getBucketDatabase(BucketSpace::placeHolder()).size(); + _cycleStartBucketCount = 0; + for (auto space : _bucketSpaces) { + _cycleStartBucketCount += _component.getBucketDatabase(space).size(); + } _lastCycleCompleted = false; - _currentRunWithFullVerification - = (state == SchedulingOptions::RUN_FULL); + _currentRunWithFullVerification = (state == SchedulingOptions::RUN_FULL); ++_cycleCount; } - for (uint32_t i=0; i<_status.size(); ++i) { - while (_status[i].pendingCount - < _scheduleOptions._maxPendingCount) - { - document::BucketId bid(_status[i].iterate( - _component.getBucketDatabase(BucketSpace::placeHolder()))); - if (bid == document::BucketId(0, 0)) { - LOG(debug, "Completed repair cycle for disk %u.", i); - // If there is no next bucket, we might have completed - // run + for (auto& disk : _status) { + while (disk.pendingCount < _scheduleOptions._maxPendingCount) { + auto bucket = disk.iterate(_bucketSpaces, _component.getBucketSpaceRepo()); + if (bucket.getBucketId() == document::BucketId(0, 0)) { + LOG(debug, "Completed repair cycle for disk %u.", disk.disk); + // If there is no next bucket, we might have completed run bool completed = true; for (uint32_t j=0; j<_status.size(); ++j) { if (!_status[j].done()) { @@ -596,17 +564,15 @@ BucketIntegrityChecker::run(framework::ThreadHandle& thread) } break; } - document::Bucket bucket(BucketSpace::placeHolder(), bid); - std::shared_ptr<RepairBucketCommand> cmd( - new RepairBucketCommand(bucket, _status[i].disk)); + auto cmd = std::make_shared<RepairBucketCommand>(bucket, disk.disk); cmd->verifyBody(_currentRunWithFullVerification); cmd->moveToIdealDisk(true); cmd->setPriority(230); LOG(spam, "Sending new repair command for bucket %s. " "After this, there will be %u pending on disk %u", - bid.toString().c_str(), - _status[i].pendingCount + 1, _status[i].disk); - ++_status[i].pendingCount; + bucket.toString().c_str(), + disk.pendingCount + 1, disk.disk); + ++disk.pendingCount; dispatchDown(cmd); } } @@ -620,8 +586,8 @@ uint32_t BucketIntegrityChecker::getTotalPendingCount() const { uint32_t total = 0; - for (uint32_t i=0; i<_status.size(); ++i) { - total += _status[i].pendingCount; + for (auto& disk : _status) { + total += disk.pendingCount; } return total; } diff --git a/storage/src/vespa/storage/storageserver/bucketintegritychecker.h b/storage/src/vespa/storage/storageserver/bucketintegritychecker.h index b169090ab3c..fb619a84c46 100644 --- a/storage/src/vespa/storage/storageserver/bucketintegritychecker.h +++ b/storage/src/vespa/storage/storageserver/bucketintegritychecker.h @@ -10,6 +10,7 @@ #pragma once +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/storage/common/servicelayercomponent.h> #include <vespa/storage/common/storagelinkqueued.h> #include <vespa/storage/config/config-stor-integritychecker.h> @@ -67,17 +68,20 @@ public: */ enum State { NOT_STARTED, IN_PROGRESS, DONE }; + size_t currentBucketSpaceIndex; document::BucketId currentBucket; uint32_t pendingCount; State state; uint8_t disk; - std::list<document::BucketId> failedRepairs; + std::list<document::Bucket> failedRepairs; uint32_t checkedBuckets; uint32_t retriedBuckets; - DiskData() : currentBucket(0), pendingCount(0), - state(NOT_STARTED), disk(255), - checkedBuckets(0), retriedBuckets(0) {} + DiskData() + : currentBucketSpaceIndex(0), currentBucket(0), + pendingCount(0), state(NOT_STARTED), disk(255), + checkedBuckets(0), retriedBuckets(0) + {} bool done() const; // Whether we're still working on this disk bool working() const; // Whether we've stated and not finished @@ -85,11 +89,14 @@ public: * Get the next bucket to repair. If no more to iterate, random bucket * is returned. Check if done() afterwards. */ - document::BucketId iterate(StorBucketDatabase&); + document::Bucket iterate(const ContentBucketSpaceRepo::BucketSpaces& bucketSpaces, + const ContentBucketSpaceRepo& bucketSpaceRepo); }; private: + ServiceLayerComponent _component; uint32_t _cycleCount; + ContentBucketSpaceRepo::BucketSpaces _bucketSpaces; std::vector<DiskData> _status; framework::SecondTime _lastCycleStart; uint32_t _cycleStartBucketCount; @@ -98,11 +105,9 @@ private: bool _currentRunWithFullVerification; bool _verifyAllRepairs; SchedulingOptions _scheduleOptions; - lib::ClusterState _systemState; vespalib::Monitor _wait; config::ConfigFetcher _configFetcher; framework::MilliSecTime _maxThreadWaitTime; - ServiceLayerComponent _component; framework::Thread::UP _thread; BucketIntegrityChecker(const BucketIntegrityChecker &); @@ -130,7 +135,6 @@ private: void configure(std::unique_ptr<vespa::config::content::core::StorIntegritycheckerConfig>) override; void onDoneInit() override; bool onInternalReply(const std::shared_ptr<api::InternalReply>&) override; - bool onSetSystemState(const std::shared_ptr<api::SetSystemStateCommand>&) override; bool onNotifyBucketChangeReply(const std::shared_ptr<api::NotifyBucketChangeReply>&) override { return true; } SchedulingOptions::RunState getCurrentRunState(framework::SecondTime time) const; void run(framework::ThreadHandle&) override; diff --git a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp index 4b624199459..1a8be145470 100644 --- a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp +++ b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.cpp @@ -5,6 +5,7 @@ #include <vespa/storage/bucketdb/storbucketdb.h> #include <vespa/storage/common/messagebucket.h> #include <vespa/storage/common/nodestateupdater.h> +#include <vespa/storage/common/content_bucket_space_repo.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/log/bufferedlogger.h> @@ -20,10 +21,9 @@ ChangedBucketOwnershipHandler::ChangedBucketOwnershipHandler( _metrics(), _configFetcher(configUri.getContext()), _stateLock(), - _currentDistribution(_component.getDistribution()), _currentState(), // Not set yet, so ownership will not be valid _currentOwnership(std::make_shared<OwnershipState>( - _currentDistribution, _currentState)), + _component.getBucketSpaceRepo(), _currentState)), _abortQueuedAndPendingOnStateChange(false), _abortMutatingIdealStateOps(false), _abortMutatingExternalLoadOps(false) @@ -66,7 +66,7 @@ ChangedBucketOwnershipHandler::setCurrentOwnershipWithStateNoLock( { _currentState = std::make_shared<lib::ClusterState>(newState); _currentOwnership = std::make_shared<OwnershipState>( - _currentDistribution, _currentState); + _component.getBucketSpaceRepo(), _currentState); } namespace { @@ -94,21 +94,32 @@ ChangedBucketOwnershipHandler::Metrics::Metrics(metrics::MetricSet* owner) {} ChangedBucketOwnershipHandler::Metrics::~Metrics() { } -ChangedBucketOwnershipHandler::OwnershipState::OwnershipState(const lib::Distribution::SP& distribution, +ChangedBucketOwnershipHandler::OwnershipState::OwnershipState(const ContentBucketSpaceRepo &contentBucketSpaceRepo, const lib::ClusterState::CSP& state) - : _distribution(distribution), + : _distributions(), _state(state) { + for (const auto &elem : contentBucketSpaceRepo) { + auto distribution = elem.second->getDistribution(); + if (distribution) { + _distributions.emplace(elem.first, std::move(distribution)); + } + } } + + ChangedBucketOwnershipHandler::OwnershipState::~OwnershipState() {} uint16_t ChangedBucketOwnershipHandler::OwnershipState::ownerOf( - const document::BucketId& bucket) const + const document::Bucket& bucket) const { + auto distributionItr = _distributions.find(bucket.getBucketSpace()); + assert(distributionItr != _distributions.end()); + const auto &distribution = *distributionItr->second; try { - return _distribution->getIdealDistributorNode(*_state, bucket); + return distribution.getIdealDistributorNode(*_state, bucket.getBucketId()); } catch (lib::TooFewBucketBitsInUseException& e) { LOGBP(debug, "Too few bucket bits used for %s to be assigned to " @@ -121,7 +132,7 @@ ChangedBucketOwnershipHandler::OwnershipState::ownerOf( "for available distributors before reaching this code path! " "Cluster state is '%s', distribution is '%s'", _state->toString().c_str(), - _distribution->toString().c_str()); + distribution.toString().c_str()); } catch (const std::exception& e) { LOG(error, "Got unknown exception while resolving distributor: %s", @@ -159,8 +170,8 @@ class StateDiffLazyAbortPredicate if (_allDistributorsHaveGoneDown) { return true; } - uint16_t oldOwner(_oldState.ownerOf(bucket.getBucketId())); - uint16_t newOwner(_newState.ownerOf(bucket.getBucketId())); + uint16_t oldOwner(_oldState.ownerOf(bucket)); + uint16_t newOwner(_newState.ownerOf(bucket)); if (oldOwner != newOwner) { LOG(spam, "Owner of %s was %u, now %u. Operation should be aborted", bucket.toString().c_str(), oldOwner, newOwner); @@ -262,9 +273,8 @@ void ChangedBucketOwnershipHandler::storageDistributionChanged() { vespalib::LockGuard guard(_stateLock); - _currentDistribution = _component.getDistribution(); _currentOwnership = std::make_shared<OwnershipState>( - _currentDistribution, _currentState); + _component.getBucketSpaceRepo(), _currentState); } bool @@ -324,7 +334,7 @@ ChangedBucketOwnershipHandler::sendingDistributorOwnsBucketInCurrentState( try { document::Bucket opBucket(getStorageMessageBucket(cmd)); - return (current->ownerOf(opBucket.getBucketId()) == cmd.getSourceIndex()); + return (current->ownerOf(opBucket) == cmd.getSourceIndex()); } catch (vespalib::IllegalArgumentException& e) { LOG(error, "Precondition violation: unable to get bucket from " diff --git a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h index b9508fc91b2..9c6e4256db6 100644 --- a/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h +++ b/storage/src/vespa/storage/storageserver/changedbucketownershiphandler.h @@ -13,6 +13,7 @@ #include <vespa/storage/persistence/messages.h> #include <atomic> #include <vector> +#include <unordered_map> namespace storage { @@ -63,7 +64,7 @@ public: }; /** - * Wrapper around the distribution & state pair that decides how to + * Wrapper around the distribution & state pairs that decides how to * compute the owner distributor for a bucket. It's possible to have * an ownership state with a nullptr cluster state when the node * initially starts up, which is why no owership state must be used unless @@ -71,21 +72,21 @@ public: */ class OwnershipState { - lib::Distribution::SP _distribution; + using BucketSpace = document::BucketSpace; + std::unordered_map<BucketSpace, std::shared_ptr<const lib::Distribution>, BucketSpace::hash> _distributions; lib::ClusterState::CSP _state; public: using SP = std::shared_ptr<OwnershipState>; using CSP = std::shared_ptr<const OwnershipState>; - OwnershipState(const lib::Distribution::SP& distribution, + OwnershipState(const ContentBucketSpaceRepo &contentBucketSpaceRepo, const lib::ClusterState::CSP& state); ~OwnershipState(); static const uint16_t FAILED_TO_RESOLVE = 0xffff; bool valid() const { - return ((_distribution.get() != nullptr) - && (_state.get() != nullptr)); + return (!_distributions.empty() && _state); } /** @@ -96,7 +97,7 @@ public: return *_state; } - uint16_t ownerOf(const document::BucketId& bucket) const; + uint16_t ownerOf(const document::Bucket& bucket) const; }; /** @@ -111,7 +112,6 @@ private: Metrics _metrics; config::ConfigFetcher _configFetcher; vespalib::Lock _stateLock; - lib::Distribution::SP _currentDistribution; lib::ClusterState::CSP _currentState; OwnershipState::CSP _currentOwnership; diff --git a/storage/src/vespa/storage/storageserver/communicationmanager.cpp b/storage/src/vespa/storage/storageserver/communicationmanager.cpp index c90b18038c2..eae51b90165 100644 --- a/storage/src/vespa/storage/storageserver/communicationmanager.cpp +++ b/storage/src/vespa/storage/storageserver/communicationmanager.cpp @@ -1,18 +1,20 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + #include "communicationmanager.h" #include "fnetlistener.h" #include "rpcrequestwrapper.h" -#include <vespa/storage/config/config-stor-server.h> -#include <vespa/storage/common/nodestateupdater.h> -#include <vespa/storageframework/generic/clock/timer.h> #include <vespa/documentapi/messagebus/messages/wrongdistributionreply.h> -#include <vespa/storageapi/message/state.h> -#include <vespa/messagebus/rpcmessagebus.h> -#include <vespa/messagebus/network/rpcnetworkparams.h> #include <vespa/messagebus/emptyreply.h> +#include <vespa/messagebus/network/rpcnetworkparams.h> +#include <vespa/messagebus/rpcmessagebus.h> +#include <vespa/storage/common/bucket_resolver.h> +#include <vespa/storage/common/nodestateupdater.h> +#include <vespa/storage/config/config-stor-server.h> +#include <vespa/storageapi/message/state.h> +#include <vespa/storageframework/generic/clock/timer.h> #include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/util/stringfmt.h> #include <vespa/log/bufferedlogger.h> LOG_SETUP(".communication.manager"); @@ -267,6 +269,22 @@ CommunicationManager::handleReply(std::unique_ptr<mbus::Reply> reply) } } +namespace { + +struct PlaceHolderBucketResolver : public BucketResolver { + virtual document::Bucket bucketFromId(const document::DocumentId &) const override { + return document::Bucket(document::BucketSpace::placeHolder(), document::BucketId(0)); + } + virtual document::BucketSpace bucketSpaceFromName(const vespalib::string &) const override { + return document::BucketSpace::placeHolder(); + } + virtual vespalib::string nameFromBucketSpace(const document::BucketSpace &) const override { + return ""; + } +}; + +} + CommunicationManager::CommunicationManager(StorageComponentRegister& compReg, const config::ConfigUri & configUri) : StorageLink("Communication manager"), _component(compReg, "communicationmanager"), @@ -277,7 +295,8 @@ CommunicationManager::CommunicationManager(StorageComponentRegister& compReg, co _count(0), _configUri(configUri), _closed(false), - _docApiConverter(configUri), + _bucketResolver(std::make_unique<PlaceHolderBucketResolver>()), + _docApiConverter(configUri, *_bucketResolver), _messageAllocTypes(_component.getMemoryManager()) { _component.registerMetricUpdateHook(*this, framework::SecondTime(5)); diff --git a/storage/src/vespa/storage/storageserver/communicationmanager.h b/storage/src/vespa/storage/storageserver/communicationmanager.h index e1f7888ac67..4cf3f33e6ea 100644 --- a/storage/src/vespa/storage/storageserver/communicationmanager.h +++ b/storage/src/vespa/storage/storageserver/communicationmanager.h @@ -35,6 +35,7 @@ namespace mbus { } namespace storage { +class BucketResolver; class VisitorMbusSession; class Visitor; class VisitorThread; @@ -167,6 +168,7 @@ private: config::ConfigUri _configUri; std::atomic<bool> _closed; + std::unique_ptr<BucketResolver> _bucketResolver; DocumentApiConverter _docApiConverter; framework::Thread::UP _thread; MessageAllocationTypes _messageAllocTypes; diff --git a/storage/src/vespa/storage/storageserver/documentapiconverter.cpp b/storage/src/vespa/storage/storageserver/documentapiconverter.cpp index ddc11e9ad77..c2761b3d832 100644 --- a/storage/src/vespa/storage/storageserver/documentapiconverter.cpp +++ b/storage/src/vespa/storage/storageserver/documentapiconverter.cpp @@ -1,18 +1,20 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + #include "documentapiconverter.h" #include "priorityconverter.h" +#include <vespa/document/bucket/bucketidfactory.h> #include <vespa/documentapi/documentapi.h> -#include <vespa/storageapi/message/visitor.h> +#include <vespa/storage/common/bucket_resolver.h> +#include <vespa/storageapi/message/batch.h> #include <vespa/storageapi/message/datagram.h> -#include <vespa/storageapi/message/persistence.h> -#include <vespa/storageapi/message/searchresult.h> -#include <vespa/storageapi/message/queryresult.h> #include <vespa/storageapi/message/documentsummary.h> #include <vespa/storageapi/message/multioperation.h> +#include <vespa/storageapi/message/persistence.h> +#include <vespa/storageapi/message/queryresult.h> #include <vespa/storageapi/message/removelocation.h> +#include <vespa/storageapi/message/searchresult.h> #include <vespa/storageapi/message/stat.h> -#include <vespa/storageapi/message/batch.h> -#include <vespa/document/bucket/bucketidfactory.h> +#include <vespa/storageapi/message/visitor.h> #include <vespa/log/log.h> LOG_SETUP(".documentapiconverter"); @@ -21,8 +23,10 @@ using document::BucketSpace; namespace storage { -DocumentApiConverter::DocumentApiConverter(const config::ConfigUri & configUri) - : _priConverter(std::make_unique<PriorityConverter>(configUri)) +DocumentApiConverter::DocumentApiConverter(const config::ConfigUri &configUri, + const BucketResolver &bucketResolver) + : _priConverter(std::make_unique<PriorityConverter>(configUri)), + _bucketResolver(bucketResolver) {} DocumentApiConverter::~DocumentApiConverter() {} @@ -38,7 +42,8 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_PUTDOCUMENT: { documentapi::PutDocumentMessage& from(static_cast<documentapi::PutDocumentMessage&>(fromMsg)); - auto to = std::make_unique<api::PutCommand>(document::Bucket(BucketSpace::placeHolder(), document::BucketId(0)), from.stealDocument(), from.getTimestamp()); + document::Bucket bucket = _bucketResolver.bucketFromId(from.getDocument().getId()); + auto to = std::make_unique<api::PutCommand>(bucket, from.stealDocument(), from.getTimestamp()); to->setCondition(from.getCondition()); toMsg = std::move(to); break; @@ -46,8 +51,8 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_UPDATEDOCUMENT: { documentapi::UpdateDocumentMessage& from(static_cast<documentapi::UpdateDocumentMessage&>(fromMsg)); - auto to = std::make_unique<api::UpdateCommand>(document::Bucket(BucketSpace::placeHolder(), document::BucketId(0)), from.stealDocumentUpdate(), - from.getNewTimestamp()); + document::Bucket bucket = _bucketResolver.bucketFromId(from.getDocumentUpdate().getId()); + auto to = std::make_unique<api::UpdateCommand>(bucket, from.stealDocumentUpdate(), from.getNewTimestamp()); to->setOldTimestamp(from.getOldTimestamp()); to->setCondition(from.getCondition()); toMsg = std::move(to); @@ -56,7 +61,7 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_REMOVEDOCUMENT: { documentapi::RemoveDocumentMessage& from(static_cast<documentapi::RemoveDocumentMessage&>(fromMsg)); - auto to = std::make_unique<api::RemoveCommand>(document::Bucket(BucketSpace::placeHolder(), document::BucketId(0)), from.getDocumentId(), 0); + auto to = std::make_unique<api::RemoveCommand>(_bucketResolver.bucketFromId(from.getDocumentId()), from.getDocumentId(), 0); to->setCondition(from.getCondition()); toMsg = std::move(to); break; @@ -64,14 +69,14 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_GETDOCUMENT: { documentapi::GetDocumentMessage& from(static_cast<documentapi::GetDocumentMessage&>(fromMsg)); - auto to = std::make_unique<api::GetCommand>(document::Bucket(BucketSpace::placeHolder(), document::BucketId(0)), from.getDocumentId(), from.getFieldSet()); + auto to = std::make_unique<api::GetCommand>(_bucketResolver.bucketFromId(from.getDocumentId()), from.getDocumentId(), from.getFieldSet()); toMsg.reset(to.release()); break; } case DocumentProtocol::MESSAGE_CREATEVISITOR: { documentapi::CreateVisitorMessage& from(static_cast<documentapi::CreateVisitorMessage&>(fromMsg)); - auto to = std::make_unique<api::CreateVisitorCommand>(BucketSpace::placeHolder(), + auto to = std::make_unique<api::CreateVisitorCommand>(_bucketResolver.bucketSpaceFromName(from.getBucketSpace()), from.getLibraryName(), from.getInstanceId(), from.getDocumentSelection()); @@ -113,13 +118,15 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_STATBUCKET: { documentapi::StatBucketMessage& from(static_cast<documentapi::StatBucketMessage&>(fromMsg)); - toMsg = std::make_unique<api::StatBucketCommand>(from.getBucket(), from.getDocumentSelection()); + document::Bucket bucket(_bucketResolver.bucketSpaceFromName(from.getBucketSpace()), from.getBucketId()); + toMsg = std::make_unique<api::StatBucketCommand>(bucket, from.getDocumentSelection()); break; } case DocumentProtocol::MESSAGE_GETBUCKETLIST: { documentapi::GetBucketListMessage& from(static_cast<documentapi::GetBucketListMessage&>(fromMsg)); - toMsg = std::make_unique<api::GetBucketListCommand>(from.getBucket()); + document::Bucket bucket(_bucketResolver.bucketSpaceFromName(from.getBucketSpace()), from.getBucketId()); + toMsg = std::make_unique<api::GetBucketListCommand>(bucket); break; } case DocumentProtocol::MESSAGE_VISITORINFO: @@ -138,7 +145,8 @@ DocumentApiConverter::toStorageAPI(documentapi::DocumentMessage& fromMsg, case DocumentProtocol::MESSAGE_REMOVELOCATION: { documentapi::RemoveLocationMessage& from(static_cast<documentapi::RemoveLocationMessage&>(fromMsg)); - api::RemoveLocationCommand::UP to(new api::RemoveLocationCommand(from.getDocumentSelection(), document::Bucket(BucketSpace::placeHolder(), document::BucketId(0)))); + document::Bucket bucket(_bucketResolver.bucketSpaceFromName(from.getBucketSpace()), document::BucketId(0)); + api::RemoveLocationCommand::UP to(new api::RemoveLocationCommand(from.getDocumentSelection(), bucket)); toMsg.reset(to.release()); break; } @@ -290,6 +298,7 @@ DocumentApiConverter::toDocumentAPI(api::StorageCommand& fromMsg, const document documentapi::CreateVisitorMessage::UP to( new documentapi::CreateVisitorMessage(from.getLibraryName(), from.getInstanceId(), from.getControlDestination(), from.getDataDestination())); + to->setBucketSpace(_bucketResolver.nameFromBucketSpace(from.getBucketSpace())); to->setDocumentSelection(from.getDocumentSelection()); to->setMaximumPendingReplyCount(from.getMaximumPendingReplyCount()); to->setParameters(from.getParameters()); @@ -315,7 +324,9 @@ DocumentApiConverter::toDocumentAPI(api::StorageCommand& fromMsg, const document case api::MessageType::STATBUCKET_ID: { api::StatBucketCommand& from(static_cast<api::StatBucketCommand&>(fromMsg)); - toMsg = std::make_unique<documentapi::StatBucketMessage>(from.getBucket(), from.getDocumentSelection()); + auto statMsg = std::make_unique<documentapi::StatBucketMessage>(from.getBucket().getBucketId(), from.getDocumentSelection()); + statMsg->setBucketSpace(_bucketResolver.nameFromBucketSpace(from.getBucket().getBucketSpace())); + toMsg = std::move(statMsg); break; } default: diff --git a/storage/src/vespa/storage/storageserver/documentapiconverter.h b/storage/src/vespa/storage/storageserver/documentapiconverter.h index 0cc2f3f3b9c..5310bcd0127 100644 --- a/storage/src/vespa/storage/storageserver/documentapiconverter.h +++ b/storage/src/vespa/storage/storageserver/documentapiconverter.h @@ -13,6 +13,7 @@ namespace api { class StorageReply; } +class BucketResolver; class PriorityConverter; /** Converts messages from storageapi to documentapi and @@ -21,7 +22,8 @@ class PriorityConverter; class DocumentApiConverter { public: - DocumentApiConverter(const config::ConfigUri & configUri); + DocumentApiConverter(const config::ConfigUri &configUri, + const BucketResolver &bucketResolver); ~DocumentApiConverter(); std::unique_ptr<api::StorageCommand> toStorageAPI(documentapi::DocumentMessage& msg, const document::DocumentTypeRepo::SP &repo); @@ -31,6 +33,7 @@ public: const PriorityConverter& getPriorityConverter() const { return *_priConverter; } private: std::unique_ptr<PriorityConverter> _priConverter; + const BucketResolver &_bucketResolver; }; } // namespace storage diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.cpp b/storage/src/vespa/storage/storageserver/mergethrottler.cpp index be30c459bdf..60dedab5ce8 100644 --- a/storage/src/vespa/storage/storageserver/mergethrottler.cpp +++ b/storage/src/vespa/storage/storageserver/mergethrottler.cpp @@ -431,7 +431,7 @@ bool MergeThrottler::isMergeAlreadyKnown(const api::StorageMessage::SP& msg) const { auto& mergeCmd = static_cast<const api::MergeBucketCommand&>(*msg); - return _merges.find(mergeCmd.getBucketId()) != _merges.end(); + return _merges.find(mergeCmd.getBucket()) != _merges.end(); } bool @@ -830,10 +830,8 @@ MergeThrottler::processNewMergeCommand( // and that we can fit it into our window. // Register the merge now so that it will contribute to filling up our // merge throttling window. - assert(_merges.find(mergeCmd.getBucketId()) == _merges.end()); - auto state = _merges.insert( - std::make_pair(mergeCmd.getBucketId(), - ChainedMergeState(msg))).first; + assert(_merges.find(mergeCmd.getBucket()) == _merges.end()); + auto state = _merges.emplace(mergeCmd.getBucket(), ChainedMergeState(msg)).first; LOG(debug, "Added merge %s to internal state", mergeCmd.toString().c_str()); @@ -911,7 +909,7 @@ MergeThrottler::processCycledMergeCommand( MergeNodeSequence nodeSeq(mergeCmd, _component.getIndex()); - auto mergeIter = _merges.find(mergeCmd.getBucketId()); + auto mergeIter = _merges.find(mergeCmd.getBucket()); assert(mergeIter != _merges.end()); if (mergeIter->second.isAborted()) { @@ -964,7 +962,7 @@ MergeThrottler::processMergeReply( { auto& mergeReply = dynamic_cast<const api::MergeBucketReply&>(*msg); - auto mergeIter = _merges.find(mergeReply.getBucketId()); + auto mergeIter = _merges.find(mergeReply.getBucket()); if (mergeIter == _merges.end()) { LOG(warning, "Received %s, which has no command mapped " "for it. Cannot send chained reply!", @@ -1075,7 +1073,7 @@ MergeThrottler::onDown(const std::shared_ptr<api::StorageMessage>& msg) } else if (isDiffCommand(*msg)) { vespalib::LockGuard lock(_stateLock); auto& cmd = static_cast<api::StorageCommand&>(*msg); - if (bucketIsUnknownOrAborted(cmd.getBucketId())) { + if (bucketIsUnknownOrAborted(cmd.getBucket())) { sendUp(makeAbortReply(cmd, "no state recorded for bucket in merge " "throttler, source merge probably aborted earlier")); return true; @@ -1104,7 +1102,7 @@ MergeThrottler::isMergeReply(const api::StorageMessage& msg) const } bool -MergeThrottler::bucketIsUnknownOrAborted(const document::BucketId& bucket) const +MergeThrottler::bucketIsUnknownOrAborted(const document::Bucket& bucket) const { auto it = _merges.find(bucket); if (it == _merges.end()) { diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.h b/storage/src/vespa/storage/storageserver/mergethrottler.h index 69fdfdc1b95..d62e9a042b2 100644 --- a/storage/src/vespa/storage/storageserver/mergethrottler.h +++ b/storage/src/vespa/storage/storageserver/mergethrottler.h @@ -13,7 +13,7 @@ #include <vespa/storage/distributor/messageguard.h> #include <vespa/storageframework/generic/status/htmlstatusreporter.h> #include <vespa/storageapi/message/bucket.h> -#include <vespa/document/bucket/bucketid.h> +#include <vespa/document/bucket/bucket.h> #include <vespa/vespalib/util/document_runnable.h> #include <vespa/messagebus/staticthrottlepolicy.h> #include <vespa/metrics/metrics.h> @@ -134,7 +134,7 @@ private: const std::string& getMergeCmdString() const { return _cmdString; } }; - typedef std::map<document::BucketId, ChainedMergeState> ActiveMergeMap; + typedef std::map<document::Bucket, ChainedMergeState> ActiveMergeMap; // Use a set rather than a priority_queue, since we want to be // able to iterate over the collection during status rendering @@ -371,7 +371,7 @@ private: bool isDiffCommand(const api::StorageMessage& msg) const; bool isMergeCommand(const api::StorageMessage& msg) const; bool isMergeReply(const api::StorageMessage& msg) const; - bool bucketIsUnknownOrAborted(const document::BucketId& bucket) const; + bool bucketIsUnknownOrAborted(const document::Bucket& bucket) const; std::shared_ptr<api::StorageMessage> makeAbortReply( api::StorageCommand& cmd, |