diff options
7 files changed, 177 insertions, 40 deletions
diff --git a/searchcore/src/tests/proton/bucketdb/bucketdb/bucketdb_test.cpp b/searchcore/src/tests/proton/bucketdb/bucketdb/bucketdb_test.cpp index 8f10f4b8045..6d14c8b81df 100644 --- a/searchcore/src/tests/proton/bucketdb/bucketdb/bucketdb_test.cpp +++ b/searchcore/src/tests/proton/bucketdb/bucketdb/bucketdb_test.cpp @@ -94,9 +94,12 @@ struct Fixture void add(const GlobalId &gid, const Timestamp ×tamp, uint32_t docSize, SubDbType subDbType) { BucketId bucket(bucket_bits, gid.convertToBucketId().getRawId()); _db.add(gid, bucket, timestamp, docSize, subDbType); + ASSERT_TRUE(_db.validateIntegrity()); } const BucketState &add(const Timestamp ×tamp, uint32_t docSize, SubDbType subDbType) { - return _db.add(GID_1, BUCKET_1, timestamp, docSize, subDbType); + const auto & state = _db.add(GID_1, BUCKET_1, timestamp, docSize, subDbType); + ASSERT_TRUE(_db.validateIntegrity()); + return state; } const BucketState &add(const Timestamp ×tamp, SubDbType subDbType) { return add(timestamp, DOCSIZE_1, subDbType); @@ -104,9 +107,11 @@ struct Fixture void remove(const GlobalId& gid, const Timestamp ×tamp, uint32_t docSize, SubDbType subDbType) { BucketId bucket(bucket_bits, gid.convertToBucketId().getRawId()); _db.remove(gid, bucket, timestamp, docSize, subDbType); + ASSERT_TRUE(_db.validateIntegrity()); } BucketState remove(const Timestamp ×tamp, uint32_t docSize, SubDbType subDbType) { _db.remove(GID_1, BUCKET_1, timestamp, docSize, subDbType); + ASSERT_TRUE(_db.validateIntegrity()); return get(); } BucketState remove(const Timestamp ×tamp, SubDbType subDbType) { @@ -114,8 +119,10 @@ struct Fixture } void remove_batch(const std::vector<RemoveBatchEntry> &removed, SubDbType sub_db_type) { _db.remove_batch(removed, sub_db_type); + ASSERT_TRUE(_db.validateIntegrity()); } BucketState get(BucketId bucket_id) const { + ASSERT_TRUE(_db.validateIntegrity()); return _db.get(bucket_id); } BucketState get() const { @@ -344,4 +351,58 @@ TEST("test that xxhash64 checksum complies") { EXPECT_EQUAL(0xd26fca9au, cksum); } +TEST("test that BucketState can count active Documents") { + GlobalId gid1("aaaaaaaaaaaa"); + GlobalId gid2("bbbbbbbbbbbb"); + GlobalId gid3("cccccccccccc"); + Timestamp t1; + BucketState bs; + EXPECT_FALSE(bs.isActive()); + EXPECT_EQUAL(0u, bs.getDocumentCount()); + EXPECT_EQUAL(0u, bs.getActiveDocumentCount()); + bs.add(gid1, t1, 1, SubDbType::READY); + EXPECT_EQUAL(1u, bs.getDocumentCount()); + EXPECT_EQUAL(0u, bs.getActiveDocumentCount()); + bs.setActive(true); + EXPECT_EQUAL(1u, bs.getActiveDocumentCount()); + bs.add(gid2, t1, 1, SubDbType::NOTREADY); + EXPECT_EQUAL(2u, bs.getDocumentCount()); + EXPECT_EQUAL(2u, bs.getActiveDocumentCount()); + bs.add(gid3, t1, 1, SubDbType::REMOVED); + EXPECT_EQUAL(2u, bs.getDocumentCount()); + EXPECT_EQUAL(2u, bs.getActiveDocumentCount()); + bs.remove(gid2, t1, 1, SubDbType::NOTREADY); + EXPECT_EQUAL(1u, bs.getDocumentCount()); + EXPECT_EQUAL(1u, bs.getActiveDocumentCount()); + bs.setActive(false); + EXPECT_EQUAL(1u, bs.getDocumentCount()); + EXPECT_EQUAL(0u, bs.getActiveDocumentCount()); +} + +TEST_F("test BucketDB active document tracking", Fixture) { + Timestamp t1; + EXPECT_EQUAL(0u, f._db.getNumActiveDocs()); + f.add(make_gid(4,1), t1, 3, SubDbType::READY); + EXPECT_EQUAL(0u, f._db.getNumActiveDocs()); + f._db.setBucketState(make_bucket_id(4), true); + EXPECT_EQUAL(1u, f._db.getNumActiveDocs()); + + BucketState bs; + bs.add(make_gid(5,1), Timestamp(1), 3, SubDbType::NOTREADY); + bs.add(make_gid(5,2), Timestamp(2), 3, SubDbType::NOTREADY); + f._db.add(make_bucket_id(5), bs); + EXPECT_EQUAL(1u, f._db.getNumActiveDocs()); + f._db.setBucketState(make_bucket_id(5), true); + EXPECT_EQUAL(3u, f._db.getNumActiveDocs()); + BucketState * writeableBS = f._db.getBucketStatePtr(make_bucket_id(5)); + writeableBS->setActive(false); + EXPECT_EQUAL(3u, f._db.getNumActiveDocs()); // Incorrect until integrity restored + f._db.restoreIntegrity(); + EXPECT_EQUAL(1u, f._db.getNumActiveDocs()); + + f.remove(make_gid(4,1), t1, 3, SubDbType::READY); + f._db.unloadBucket(make_bucket_id(5), bs); + EXPECT_EQUAL(0u, f._db.getNumActiveDocs()); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp index 9a2655cdd8a..f5ac3cd8c13 100644 --- a/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp +++ b/searchcore/src/tests/proton/documentmetastore/documentmetastore_test.cpp @@ -70,14 +70,14 @@ static constexpr uint64_t timestampBias = UINT64_C(2000000000000); class DummyTlsSyncer : public ITlsSyncer { public: - virtual ~DummyTlsSyncer() = default; + ~DummyTlsSyncer() override = default; - virtual void sync() override { } + void sync() override { } }; struct BoolVector : public std::vector<bool> { BoolVector() : std::vector<bool>() {} - BoolVector(size_t sz) : std::vector<bool>(sz) {} + explicit BoolVector(size_t sz) : std::vector<bool>(sz) {} BoolVector &T() { push_back(true); return *this; } BoolVector &F() { push_back(false); return *this; } @@ -478,9 +478,9 @@ TEST(DocumentMetaStoreTest, gids_can_be_saved_and_loaded) uint32_t addLid = addGid(dms1, gid, bucketId, Timestamp(lid + timestampBias)); EXPECT_EQ(lid, addLid); } - for (size_t i = 0; i < removeLids.size(); ++i) { - dms1.remove(removeLids[i], 0u); - dms1.removes_complete({ removeLids[i] }); + for (uint32_t lid : removeLids) { + dms1.remove(lid, 0u); + dms1.removes_complete({ lid }); } uint64_t expSaveBytesSize = DocumentMetaStore::minHeaderLen + (1000 - 4) * DocumentMetaStore::entrySize; @@ -675,17 +675,17 @@ requireThatBasicBucketInfoWorks() uint32_t cnt = 0u; uint32_t maxcnt = 0u; bucketdb::Guard bucketDB = dms.getBucketDB().takeGuard(); - for (Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) { - if (i->first.first == prevBucket) { - cksum.add(i->first.second, i->second, 1, SubDbType::READY); + for (const auto & e : m) { + if (e.first.first == prevBucket) { + cksum.add(e.first.second, e.second, 1, SubDbType::READY); ++cnt; } else { BucketInfo bi = bucketDB->get(prevBucket); EXPECT_EQ(cnt, bi.getDocumentCount()); EXPECT_EQ(cksum.getChecksum(), bi.getChecksum()); - prevBucket = i->first.first; + prevBucket = e.first.first; cksum = BucketState(); - cksum.add(i->first.second, i->second, 1, SubDbType::READY); + cksum.add(e.first.second, e.second, 1, SubDbType::READY); maxcnt = std::max(maxcnt, cnt); cnt = 1u; } @@ -727,26 +727,26 @@ TEST(DocumentMetaStoreTest, can_retrieve_list_of_lids_from_bucket_id) // Verify that bucket id x has y lids EXPECT_EQ(4u, m.size()); - for (Map::const_iterator itr = m.begin(); itr != m.end(); ++itr) { - const BucketId &bucketId = itr->first; - const LidVector &expLids = itr->second; + for (const auto & e : m) { + const BucketId &bucketId = e.first; + const LidVector &expLids = e.second; LOG(info, "Verify that bucket id '%s' has %zu lids", bucketId.toString().c_str(), expLids.size()); LidVector actLids; dms.getLids(bucketId, actLids); EXPECT_EQ(expLids.size(), actLids.size()); - for (size_t i = 0; i < expLids.size(); ++i) { - EXPECT_TRUE(std::find(actLids.begin(), actLids.end(), expLids[i]) != actLids.end()); + for (uint32_t lid : expLids) { + EXPECT_TRUE(std::find(actLids.begin(), actLids.end(), lid) != actLids.end()); } } // Remove and verify empty buckets - for (Map::const_iterator itr = m.begin(); itr != m.end(); ++itr) { - const BucketId &bucketId = itr->first; - const LidVector &expLids = itr->second; - for (size_t i = 0; i < expLids.size(); ++i) { - EXPECT_TRUE(dms.remove(expLids[i], 0u)); - dms.removes_complete({ expLids[i] }); + for (const auto & e : m) { + const BucketId &bucketId = e.first; + const LidVector &expLids = e.second; + for (uint32_t lid : expLids) { + EXPECT_TRUE(dms.remove(lid, 0u)); + dms.removes_complete({ lid }); } LOG(info, "Verify that bucket id '%s' has 0 lids", bucketId.toString().c_str()); LidVector actLids; @@ -801,7 +801,7 @@ UserDocFixture::UserDocFixture() bid2 = BucketId(minNumBits, gids[2].convertToBucketId().getRawId()); bid3 = BucketId(minNumBits, gids[7].convertToBucketId().getRawId()); } -UserDocFixture::~UserDocFixture() {} +UserDocFixture::~UserDocFixture() = default; void UserDocFixture::addGlobalIds(size_t numGids) { @@ -990,7 +990,7 @@ struct GlobalIdEntry { BucketId bid1; BucketId bid2; BucketId bid3; - GlobalIdEntry(uint32_t lid_) : + explicit GlobalIdEntry(uint32_t lid_) : lid(lid_), gid(createGid(lid_)), bid1(1, gid.convertToBucketId().getRawId()), @@ -1005,7 +1005,7 @@ struct MyBucketCreateListener : public IBucketCreateListener { std::vector<document::BucketId> _buckets; MyBucketCreateListener(); - ~MyBucketCreateListener(); + ~MyBucketCreateListener() override; void notifyCreateBucket(const bucketdb::Guard & guard, const document::BucketId &bucket) override; }; @@ -1041,9 +1041,12 @@ struct SplitAndJoinEmptyFixture { return dms.getBucketDB().takeGuard()->get(bid); } - void assertNotifyCreateBuckets(std::vector<document::BucketId> expBuckets) { + void assertNotifyCreateBuckets(const std::vector<document::BucketId> & expBuckets) { EXPECT_EQ(expBuckets, _bucketCreateListener._buckets); } + void assertBucketDBIntegrity() { + ASSERT_TRUE(dms.getBucketDB().takeGuard()->validateIntegrity()); + } }; SplitAndJoinEmptyFixture::SplitAndJoinEmptyFixture() @@ -1252,7 +1255,9 @@ TEST(DocumentMetaStoreTest, active_state_is_preserved_after_split) assertActiveLids(getBoolVector(*f.bid30Gids, 31), f.dms.getActiveLids()); EXPECT_EQ(f.bid30Gids->size(), f.dms.getNumActiveLids()); + f.assertBucketDBIntegrity(); f._bucketDBHandler.handleSplit(10, f.bid10, f.bid20, f.bid22); + f.assertBucketDBIntegrity(); EXPECT_FALSE(f.getInfo(f.bid20).isActive()); EXPECT_FALSE(f.getInfo(f.bid22).isActive()); assertActiveLids(BoolVector(31), f.dms.getActiveLids()); diff --git a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.cpp b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.cpp index 0fee6e494fe..5b85cdc53b1 100644 --- a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.cpp +++ b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.cpp @@ -17,6 +17,7 @@ using bucketdb::RemoveBatchEntry; BucketDB::BucketDB() : _map(), + _numActiveDocs(0), _cachedBucketId(), _cachedBucketState() { @@ -28,9 +29,22 @@ BucketDB::~BucketDB() clear(); } +size_t +BucketDB::countActiveDocs() const { + size_t activeDocs = 0; + for (const auto & state : _map) { + activeDocs += state.second.getActiveDocumentCount(); + } + return activeDocs; +} + void -BucketDB::add(BucketId bucketId, const BucketState & state) { - _map[bucketId] += state; +BucketDB::add(BucketId bucketId, const BucketState & delta) { + auto & state = _map[bucketId]; + state += delta; + if (state.isActive()) { + addActive(delta.getDocumentCount()); + } } bucketdb::BucketState * @@ -41,11 +55,20 @@ BucketDB::getBucketStatePtr(BucketId bucket) } void +BucketDB::checkActiveCount() const { + assert(getNumActiveDocs() == countActiveDocs()); +} + +void BucketDB::unloadBucket(BucketId bucket, const BucketState &delta) { + checkActiveCount(); BucketState *state = getBucketStatePtr(bucket); assert(state); *state -= delta; + if (state->isActive()) { + subActive(delta.getDocumentCount()); + } } const bucketdb::BucketState & @@ -55,6 +78,9 @@ BucketDB::add(const GlobalId &gid, { BucketState &state = _map[bucketId]; state.add(gid, timestamp, docSize, subDbType); + if (state.isActive() && subDbType != SubDbType::REMOVED) { + addActive(1); + } return state; } @@ -65,6 +91,9 @@ BucketDB::remove(const GlobalId &gid, { BucketState &state = _map[bucketId]; state.remove(gid, timestamp, docSize, subDbType); + if (state.isActive() && subDbType != SubDbType::REMOVED) { + subActive(1); + } } void @@ -78,6 +107,9 @@ BucketDB::remove_batch(const std::vector<RemoveBatchEntry> &removed, SubDbType s prev_bucket_id = entry.get_bucket_id(); } state->remove(entry.get_gid(), entry.get_timestamp(), entry.get_doc_size(), sub_db_type); + if (state->isActive() && sub_db_type != SubDbType::REMOVED) { + subActive(1); + } } } @@ -167,16 +199,12 @@ BucketDB::getBuckets() const return buckets; } -bool -BucketDB::empty() const -{ - return _map.empty(); -} - void BucketDB::clear() { + checkActiveCount(); _map.clear(); + _numActiveDocs = 0ul; } void @@ -187,6 +215,7 @@ BucketDB::checkEmpty() const assert(state.empty()); (void) state; } + assert(getNumActiveDocs() == 0ul); } @@ -194,7 +223,13 @@ void BucketDB::setBucketState(BucketId bucketId, bool active) { BucketState &state = _map[bucketId]; + if (active == state.isActive()) return; state.setActive(active); + if (active) { + addActive(state.getDocumentCount()); + } else { + subActive(state.getDocumentCount()); + } } @@ -264,4 +299,16 @@ BucketDB::populateActiveBuckets(BucketId::List buckets) return fixupBuckets; } +void BucketDB::restoreIntegrity() { + uncacheBucket(); + _numActiveDocs = countActiveDocs(); +} + +bool +BucketDB::validateIntegrity() const { + checkActiveCount(); + return true; +} + + } diff --git a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.h b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.h index 7c1c336ab48..286c1e3aa45 100644 --- a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.h +++ b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketdb.h @@ -6,6 +6,7 @@ #include <vespa/document/bucket/bucketid.h> #include <vespa/persistence/spi/result.h> #include <vespa/vespalib/stllike/hash_map.h> +#include <atomic> namespace proton::bucketdb { class RemoveBatchEntry; } @@ -21,12 +22,21 @@ private: using BucketState = bucketdb::BucketState; using Map = vespalib::hash_map<BucketId, BucketState, document::BucketId::hash>; - Map _map; - BucketId _cachedBucketId; - BucketState _cachedBucketState; + Map _map; + std::atomic<size_t> _numActiveDocs; + BucketId _cachedBucketId; + BucketState _cachedBucketState; void clear(); void checkEmpty() const; + size_t countActiveDocs() const; + void checkActiveCount() const; + void addActive(size_t value) { + _numActiveDocs.store(getNumActiveDocs() + value, std::memory_order_relaxed); + } + void subActive(size_t value) { + _numActiveDocs.store(getNumActiveDocs() - value, std::memory_order_relaxed); + } public: BucketDB(); ~BucketDB(); @@ -55,7 +65,7 @@ public: BucketState cachedGet(BucketId bucketId) const; bool hasBucket(BucketId bucketId) const; BucketId::List getBuckets() const; - bool empty() const; + bool empty() const { return _map.empty(); } void setBucketState(BucketId bucketId, bool active); void createBucket(BucketId bucketId); void deleteEmptyBucket(BucketId bucketId); @@ -63,8 +73,15 @@ public: BucketId::List populateActiveBuckets(BucketId::List buckets); size_t size() const { return _map.size(); } bool isActiveBucket(BucketId bucketId) const; - BucketState *getBucketStatePtr(BucketId bucket); void unloadBucket(BucketId bucket, const BucketState &delta); + size_t getNumActiveDocs() const { return _numActiveDocs.load(std::memory_order_relaxed); } + + // Avoid using this one as it breaks encapsulation + BucketState *getBucketStatePtr(BucketId bucket); + // Must be called if buckets state aquired with getBucketStatePtr has been modified. + void restoreIntegrity(); + bool validateIntegrity() const; + }; } diff --git a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.cpp b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.cpp index b46fd0a7a3e..7785e62d9cc 100644 --- a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.cpp +++ b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.cpp @@ -10,6 +10,9 @@ BucketSessionBase::BucketSessionBase(BucketDBOwner &bucketDB, IBucketCreateNotif { } +BucketSessionBase::~BucketSessionBase() { + _bucketDB->restoreIntegrity(); +} bool BucketSessionBase::extractInfo(const BucketId &bucket, BucketState *&state) diff --git a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.h b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.h index cc1b2be6ada..db7470eae7c 100644 --- a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.h +++ b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketsessionbase.h @@ -25,6 +25,9 @@ protected: public: BucketSessionBase(BucketDBOwner &bucketDB, IBucketCreateNotifier &bucketCreateNotifier); + BucketSessionBase(const BucketSessionBase &) = delete; + BucketSessionBase & operator =(const BucketSessionBase &) = delete; + ~BucketSessionBase(); bool extractInfo(const BucketId &bucket, BucketState *&info); static bool calcFixupNeed(BucketState *state, bool wantActive, bool fixup); diff --git a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketstate.h b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketstate.h index 53d57e27509..1f82f29315d 100644 --- a/searchcore/src/vespa/searchcore/proton/bucketdb/bucketstate.h +++ b/searchcore/src/vespa/searchcore/proton/bucketdb/bucketstate.h @@ -66,6 +66,7 @@ public: size_t getRemovedDocSizes() const { return _docSizes[REMOVED]; } size_t getNotReadyDocSizes() const { return _docSizes[NOTREADY]; } uint32_t getDocumentCount() const { return getReadyCount() + getNotReadyCount(); } + uint32_t getActiveDocumentCount() const { return isActive() ? getDocumentCount() : 0u;} uint32_t getEntryCount() const { return getDocumentCount() + getRemovedCount(); } BucketChecksum getChecksum() const; bool empty() const; |