summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-10-09 10:40:58 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-10-09 10:40:58 +0000
commit1d66f293f81f947953d31a3aa2e19620e5f5d567 (patch)
tree5aa16395daa3aa014e01569c4be95400c891f844 /searchlib
parent0bfa8f44c1d7fe8d419800e1ea568710cc582356 (diff)
Add test for BucketIndexStore and the iterator.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.h4
3 files changed, 44 insertions, 10 deletions
diff --git a/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp b/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
index 50e99b15fb2..5684f4a4070 100644
--- a/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
+++ b/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
@@ -4,7 +4,7 @@
#include <vespa/document/bucket/bucketid.h>
#include <vespa/document/base/documentid.h>
-#include <vespa/searchlib/docstore/storebybucket.h>
+#include <vespa/searchlib/docstore/compacter.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/stllike/hash_set.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
@@ -25,8 +25,8 @@ createPayload(BucketId b) {
}
uint32_t userId(size_t i) { return i%100; }
-void
-add(StoreByBucket & sbb, size_t i) {
+BucketId
+createBucketId(size_t i) {
constexpr size_t USED_BITS=5;
vespalib::asciistream os;
os << "id:a:b:n=" << userId(i) << ":" << i;
@@ -34,6 +34,11 @@ add(StoreByBucket & sbb, size_t i) {
BucketId b = docId.getGlobalId().convertToBucketId();
EXPECT_EQUAL(userId(i), docId.getGlobalId().getLocationSpecificBits());
b.setUsedBits(USED_BITS);
+ return b;
+}
+void
+add(StoreByBucket & sbb, size_t i) {
+ BucketId b = createBucketId(i);
vespalib::string s = createPayload(b);
sbb.add(b, i%10, i, {s.c_str(), s.size()});
}
@@ -78,7 +83,7 @@ struct StoreIndex : public StoreByBucket::StoreIndex {
StoreIndex::~StoreIndex() = default;
struct Iterator : public StoreByBucket::IndexIterator {
- Iterator(const std::vector<StoreByBucket::Index> & where) : _where(where), _current(0) {}
+ explicit Iterator(const std::vector<StoreByBucket::Index> & where) : _where(where), _current(0) {}
bool has_next() noexcept override {
return _current < _where.size();
@@ -99,19 +104,48 @@ TEST("require that StoreByBucket gives bucket by bucket and ordered within")
vespalib::ThreadStackExecutor executor(8);
StoreIndex storeIndex;
StoreByBucket sbb(storeIndex, backing, executor, CompressionConfig::LZ4);
- for (size_t i(1); i <=500; i++) {
+ for (size_t i(1); i <= 500u; i++) {
add(sbb, i);
}
- for (size_t i(1000); i > 500; i--) {
+ for (size_t i(1000); i > 500u; i--) {
add(sbb, i);
}
sbb.close();
std::sort(storeIndex._where.begin(), storeIndex._where.end());
- //EXPECT_EQUAL(32u, sbb.getBucketCount());
EXPECT_EQUAL(1000u, storeIndex._where.size());
VerifyBucketOrder vbo;
Iterator all(storeIndex._where);
sbb.drain(vbo, all);
}
+constexpr uint32_t NUM_PARTS = 3;
+
+void
+verifyIter(BucketIndexStore &store, uint32_t partId, uint32_t expected_count) {
+ auto iter = store.createIterator(partId);
+ uint32_t count(0);
+ while (iter->has_next()) {
+ StoreByBucket::Index idx = iter->next();
+ EXPECT_EQUAL(store.toPartitionId(idx._bucketId), partId);
+ count++;
+ }
+ EXPECT_EQUAL(expected_count, count);
+}
+
+TEST("test that iterators cover the whole corpus and maps to correct partid") {
+
+ BucketIndexStore bucketIndexStore(32, NUM_PARTS);
+ for (size_t i(1); i <= 500u; i++) {
+ bucketIndexStore.store(StoreByBucket::Index(createBucketId(i), 1, 2, i));
+ }
+ bucketIndexStore.prepareForIterate();
+ EXPECT_EQUAL(500u, bucketIndexStore.getLidCount());
+ EXPECT_EQUAL(32u, bucketIndexStore.getBucketCount());
+ constexpr uint32_t COUNT_0 = 175, COUNT_1 = 155, COUNT_2 = 170;
+ verifyIter(bucketIndexStore, 0, COUNT_0);
+ verifyIter(bucketIndexStore, 1, COUNT_1);
+ verifyIter(bucketIndexStore, 2, COUNT_2);
+ EXPECT_EQUAL(500u, COUNT_0 + COUNT_1 + COUNT_2);
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
index 44fd88e7994..ebef684479f 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
@@ -25,7 +25,7 @@ Compacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, ConstBufferRef
}
BucketIndexStore::BucketIndexStore(size_t maxSignificantBucketBits, uint32_t numPartitions) noexcept
- : _unSignificantBucketBits((maxSignificantBucketBits > 8) ? (maxSignificantBucketBits - 8) : 0),
+ : _inSignificantBucketBits((maxSignificantBucketBits > 8) ? (maxSignificantBucketBits - 8) : 0),
_where(),
_numPartitions(numPartitions),
_readyForIterate(true)
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.h b/searchlib/src/vespa/searchlib/docstore/compacter.h
index 7dff421c1ba..57f65b249f5 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.h
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.h
@@ -29,7 +29,7 @@ public:
~BucketIndexStore() override;
size_t toPartitionId(document::BucketId bucketId) const noexcept {
uint64_t sortableBucketId = bucketId.toKey();
- return (sortableBucketId >> _unSignificantBucketBits) % _numPartitions;
+ return (sortableBucketId >> _inSignificantBucketBits) % _numPartitions;
}
void store(const StoreByBucket::Index & index) override;
size_t getBucketCount() const noexcept;
@@ -48,7 +48,7 @@ private:
size_t _partitionId;
IndexVector::const_iterator _current;
};
- size_t _unSignificantBucketBits;
+ size_t _inSignificantBucketBits;
IndexVector _where;
uint32_t _numPartitions;
bool _readyForIterate;