From 9a23880768d1045917d635d2783b68268dca1047 Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Mon, 13 Mar 2023 10:20:57 +0000 Subject: Be explicit about lbound/ubound for bucket DB iteration and add lbound variant The DB API was rather coy about whether `forEach` had lower or upper bound semantics with regards to the bucket ID passed in as a starting point. Be explicit and add a lower-bound variant. --- .../src/tests/distributor/bucketdatabasetest.cpp | 85 +++++++++++++++------- .../top_level_bucket_db_updater_test.cpp | 4 +- .../storage/bucketdb/btree_bucket_database.cpp | 13 +++- .../vespa/storage/bucketdb/btree_bucket_database.h | 3 +- .../src/vespa/storage/bucketdb/bucketdatabase.h | 12 ++- .../storage/distributor/idealstatemanager.cpp | 2 +- .../operations/external/visitoroperation.cpp | 22 +++--- 7 files changed, 90 insertions(+), 51 deletions(-) (limited to 'storage') diff --git a/storage/src/tests/distributor/bucketdatabasetest.cpp b/storage/src/tests/distributor/bucketdatabasetest.cpp index 661fd7fee72..fcc64e0cccf 100644 --- a/storage/src/tests/distributor/bucketdatabasetest.cpp +++ b/storage/src/tests/distributor/bucketdatabasetest.cpp @@ -87,7 +87,7 @@ struct ListAllProcessor : public BucketDatabase::EntryProcessor { std::string dump_db(const BucketDatabase& db) { ListAllProcessor proc; - db.forEach(proc, document::BucketId()); + db.for_each_upper_bound(proc, document::BucketId()); return proc.ost.str(); } @@ -122,41 +122,70 @@ TEST_P(BucketDatabaseTest, iterating) { { ListAllProcessor proc; - db().forEach(proc, document::BucketId()); - - EXPECT_EQ( - std::string( - "BucketId(0x4000000000000010) : " - "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" - "BucketId(0x400000000000002a) : " - "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" - "BucketId(0x400000000000000b) : " - "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n"), - proc.ost.str()); + db().for_each_upper_bound(proc, document::BucketId()); + + EXPECT_EQ("BucketId(0x4000000000000010) : " + "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000002a) : " + "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000000b) : " + "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); } { ListAllProcessor proc; - db().forEach(proc, document::BucketId(16, 0x2a)); + db().for_each_lower_bound(proc, document::BucketId()); // lbound (in practice) equal to ubound when starting at zero + + EXPECT_EQ("BucketId(0x4000000000000010) : " + "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000002a) : " + "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000000b) : " + "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); + } + + { + ListAllProcessor proc; + db().for_each_upper_bound(proc, document::BucketId(16, 0x2a)); + + EXPECT_EQ("BucketId(0x400000000000000b) : " + "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); + } + + { + ListAllProcessor proc; + db().for_each_lower_bound(proc, document::BucketId(16, 0x2a)); + // Includes 0x2a + EXPECT_EQ("BucketId(0x400000000000002a) : " + "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000000b) : " + "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); + } - EXPECT_EQ( - std::string( - "BucketId(0x400000000000000b) : " - "node(idx=2,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n"), - proc.ost.str()); + { + StoppingProcessor proc; + db().for_each_upper_bound(proc, document::BucketId()); + + EXPECT_EQ("BucketId(0x4000000000000010) : " + "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000002a) : " + "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); } { StoppingProcessor proc; - db().forEach(proc, document::BucketId()); - - EXPECT_EQ( - std::string( - "BucketId(0x4000000000000010) : " - "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" - "BucketId(0x400000000000002a) : " - "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n"), - proc.ost.str()); + db().for_each_lower_bound(proc, document::BucketId()); + + EXPECT_EQ("BucketId(0x4000000000000010) : " + "node(idx=1,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n" + "BucketId(0x400000000000002a) : " + "node(idx=3,crc=0x0,docs=0/0,bytes=1/1,trusted=false,active=false,ready=false)\n", + proc.ost.str()); } } @@ -761,7 +790,7 @@ TEST_P(BucketDatabaseTest, DISABLED_benchmark_const_iteration) { auto elapsed = vespalib::BenchmarkTimer::benchmark([&] { DummyProcessor proc; - db().forEach(proc, document::BucketId()); + db().for_each_upper_bound(proc, document::BucketId()); }, 5); fprintf(stderr, "Full DB iteration of %s takes %g seconds\n", db().toString(false).c_str(), elapsed); diff --git a/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp b/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp index 7b4f688b253..d5d33a178fe 100644 --- a/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp +++ b/storage/src/tests/distributor/top_level_bucket_db_updater_test.cpp @@ -1697,7 +1697,7 @@ TopLevelBucketDBUpdaterTest::merge_bucket_lists( BucketDumper dumper_tmp(true); for (auto* s : distributor_stripes()) { auto& db = s->getBucketSpaceRepo().get(document::FixedBucketSpaces::default_space()).getBucketDatabase(); - db.forEach(dumper_tmp); + db.for_each_upper_bound(dumper_tmp); } { @@ -1717,7 +1717,7 @@ TopLevelBucketDBUpdaterTest::merge_bucket_lists( BucketDumper dumper(include_bucket_info); for (auto* s : distributor_stripes()) { auto& db = s->getBucketSpaceRepo().get(document::FixedBucketSpaces::default_space()).getBucketDatabase(); - db.forEach(dumper); + db.for_each_upper_bound(dumper); db.clear(); } return dumper.ost.str(); diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp index 23421e724a2..baec5494b36 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.cpp @@ -151,9 +151,16 @@ BTreeBucketDatabase::process_update(const document::BucketId& bucket, EntryUpdat } // TODO need snapshot read with guarding -// FIXME semantics of for-each in judy and bit tree DBs differ, former expects lbound, latter ubound..! -// FIXME but bit-tree code says "lowerBound" in impl and "after" in declaration??? -void BTreeBucketDatabase::forEach(EntryProcessor& proc, const BucketId& after) const { +void BTreeBucketDatabase::for_each_lower_bound(EntryProcessor& proc, const BucketId& at_or_after) const { + for (auto iter = _impl->lower_bound(at_or_after.toKey()); iter.valid(); ++iter) { + if (!proc.process(_impl->const_value_ref_from_valid_iterator(iter))) { + break; + } + } +} + +// TODO need snapshot read with guarding +void BTreeBucketDatabase::for_each_upper_bound(EntryProcessor& proc, const BucketId& after) const { for (auto iter = _impl->upper_bound(after.toKey()); iter.valid(); ++iter) { if (!proc.process(_impl->const_value_ref_from_valid_iterator(iter))) { break; diff --git a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h index c20dad13618..3cf77b5444b 100644 --- a/storage/src/vespa/storage/bucketdb/btree_bucket_database.h +++ b/storage/src/vespa/storage/bucketdb/btree_bucket_database.h @@ -44,7 +44,8 @@ public: std::vector& entries) const override; void update(const Entry& newEntry) override; void process_update(const document::BucketId& bucket, EntryUpdateProcessor &processor, bool create_if_nonexisting) override; - void forEach(EntryProcessor&, const document::BucketId& after) const override; + void for_each_lower_bound(EntryProcessor&, const document::BucketId& at_or_after) const override; + void for_each_upper_bound(EntryProcessor&, const document::BucketId& after) const override; Entry upperBound(const document::BucketId& value) const override; uint64_t size() const override; void clear() override; diff --git a/storage/src/vespa/storage/bucketdb/bucketdatabase.h b/storage/src/vespa/storage/bucketdb/bucketdatabase.h index d3d9c34c7fc..4e0b727036a 100644 --- a/storage/src/vespa/storage/bucketdb/bucketdatabase.h +++ b/storage/src/vespa/storage/bucketdb/bucketdatabase.h @@ -99,9 +99,15 @@ public: virtual void process_update(const document::BucketId& bucket, EntryUpdateProcessor &processor, bool create_if_nonexisting) = 0; - virtual void forEach( - EntryProcessor&, - const document::BucketId& after = document::BucketId()) const = 0; + virtual void for_each_lower_bound(EntryProcessor&, const document::BucketId& at_or_after) const = 0; + void for_each_lower_bound(EntryProcessor& proc) const { + for_each_lower_bound(proc, document::BucketId()); + } + + virtual void for_each_upper_bound(EntryProcessor&, const document::BucketId& after) const = 0; + void for_each_upper_bound(EntryProcessor& proc) const { + for_each_upper_bound(proc, document::BucketId()); + } using TrailingInserter = bucketdb::TrailingInserter; using Merger = bucketdb::Merger; diff --git a/storage/src/vespa/storage/distributor/idealstatemanager.cpp b/storage/src/vespa/storage/distributor/idealstatemanager.cpp index cf255b5ec18..2c33bc490fe 100644 --- a/storage/src/vespa/storage/distributor/idealstatemanager.cpp +++ b/storage/src/vespa/storage/distributor/idealstatemanager.cpp @@ -265,7 +265,7 @@ IdealStateManager::getBucketStatus( void IdealStateManager::dump_bucket_space_db_status(document::BucketSpace bucket_space, std::ostream& out) const { StatusBucketVisitor proc(*this, bucket_space, out); auto& distributorBucketSpace = _op_ctx.bucket_space_repo().get(bucket_space); - distributorBucketSpace.getBucketDatabase().forEach(proc); + distributorBucketSpace.getBucketDatabase().for_each_upper_bound(proc); } void IdealStateManager::getBucketStatus(std::ostream& out) const { diff --git a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp index 9e9196dbee7..ca47ab7478c 100644 --- a/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp +++ b/storage/src/vespa/storage/distributor/operations/external/visitoroperation.cpp @@ -388,23 +388,19 @@ VisitorOperation::pickBucketsToVisit(const std::vector& b std::vector bucketVisitOrder; - for (uint32_t i = 0; i < buckets.size(); ++i) { - bucketVisitOrder.push_back(buckets[i].getBucketId()); + for (const auto& bucket : buckets) { + bucketVisitOrder.push_back(bucket.getBucketId()); } VisitorOrder bucketLessThan; std::sort(bucketVisitOrder.begin(), bucketVisitOrder.end(), bucketLessThan); - std::vector::const_iterator iter(bucketVisitOrder.begin()); - std::vector::const_iterator end(bucketVisitOrder.end()); + auto iter = bucketVisitOrder.begin(); + auto end = bucketVisitOrder.end(); for (; iter != end; ++iter) { - if (bucketLessThan(*iter, _lastBucket) || - *iter == _lastBucket) - { - LOG(spam, - "Skipping bucket %s because it is lower than or equal to progress bucket %s", - iter->toString().c_str(), - _lastBucket.toString().c_str()); + if (bucketLessThan(*iter, _lastBucket) || *iter == _lastBucket) { + LOG(spam, "Skipping bucket %s because it is lower than or equal to progress bucket %s", + iter->toString().c_str(), _lastBucket.toString().c_str()); continue; } LOG(spam, "Iterating: Found in db: %s", iter->toString().c_str()); @@ -460,11 +456,11 @@ getBucketIdAndLast(BucketDatabase& database, { if (!super.contains(last)) { NextEntryFinder proc(super); - database.forEach(proc, super); + database.for_each_upper_bound(proc, super); return proc._next; } else { NextEntryFinder proc(last); - database.forEach(proc, last); + database.for_each_upper_bound(proc, last); return proc._next; } } -- cgit v1.2.3