aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-10-04 20:07:32 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2023-10-04 20:07:32 +0000
commit643d4162ee887dfe642d7d2e632ed36c2d36f3d3 (patch)
tree140eca04a5c8c3d8d7d4776277792f48fcbd50cf /searchlib
parent584223f47c5a8bb16a4c070a1fb3e3d69cb752d6 (diff)
- Instead of keeping a map of bucketId => lids, just append everything to a vector and sort when complete.
- This significantly improves memory usage during compaction. Instead of many heap allocations - You now get fewer mmapped allocations that are dropped when done.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/docstore/compacter.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/docstore/storebybucket.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/docstore/storebybucket.h16
4 files changed, 36 insertions, 21 deletions
diff --git a/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp b/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
index 13aa1880e8c..c7e0c59da12 100644
--- a/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
+++ b/searchlib/src/tests/docstore/store_by_bucket/store_by_bucket_test.cpp
@@ -7,7 +7,6 @@
#include <vespa/searchlib/docstore/storebybucket.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/stllike/hash_set.h>
-#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/log/log.h>
@@ -79,6 +78,7 @@ TEST("require that StoreByBucket gives bucket by bucket and ordered within")
for (size_t i(1000); i > 500; i--) {
add(sbb, i);
}
+ sbb.close();
EXPECT_EQUAL(32u, sbb.getBucketCount());
EXPECT_EQUAL(1000u, sbb.getLidCount());
VerifyBucketOrder vbo;
diff --git a/searchlib/src/vespa/searchlib/docstore/compacter.cpp b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
index c886e52659f..1f817be1f25 100644
--- a/searchlib/src/vespa/searchlib/docstore/compacter.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/compacter.cpp
@@ -79,7 +79,8 @@ BucketCompacter::close()
size_t lidCount1(0);
size_t bucketCount(0);
size_t chunkCount(0);
- for (const StoreByBucket & store : _tmpStore) {
+ for (StoreByBucket & store : _tmpStore) {
+ store.close();
lidCount1 += store.getLidCount();
bucketCount += store.getBucketCount();
chunkCount += store.getChunkCount();
diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
index 6d3c39a51dc..14beccaac9a 100644
--- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp
@@ -42,9 +42,8 @@ StoreByBucket::add(BucketId bucketId, uint32_t chunkId, uint32_t lid, const void
});
_executor.execute(CpuUsage::wrap(std::move(task), CpuUsage::Category::COMPACT));
}
- Index idx(bucketId, _current->getId(), chunkId, lid);
_current->append(lid, buffer, sz);
- _where[bucketId.toKey()].push_back(idx);
+ _where.emplace_back(bucketId, _current->getId(), chunkId, lid);
}
Chunk::UP
@@ -88,14 +87,34 @@ StoreByBucket::waitAllProcessed() {
}
void
-StoreByBucket::drain(IWrite & drainer)
-{
+StoreByBucket::close() {
incChunksPosted();
auto task = makeLambdaTask([this, chunk=std::move(_current)]() mutable {
closeChunk(std::move(chunk));
});
_executor.execute(CpuUsage::wrap(std::move(task), CpuUsage::Category::COMPACT));
waitAllProcessed();
+ std::sort(_where.begin(), _where.end());
+}
+
+size_t
+StoreByBucket::getBucketCount() const {
+ if (_where.empty()) return 0;
+
+ size_t count = 0;
+ BucketId prev = _where.front()._bucketId;
+ for (const auto & lid : _where) {
+ if (lid._bucketId != prev) {
+ count++;
+ prev = lid._bucketId;
+ }
+ }
+ return count + 1;
+}
+
+void
+StoreByBucket::drain(IWrite & drainer)
+{
std::vector<Chunk::UP> chunks;
chunks.resize(_chunks.size());
for (const auto & it : _chunks) {
@@ -103,12 +122,9 @@ StoreByBucket::drain(IWrite & drainer)
chunks[it.first] = std::make_unique<Chunk>(it.first, buf.data(), buf.size());
}
_chunks.clear();
- for (auto & it : _where) {
- std::sort(it.second.begin(), it.second.end());
- for (Index idx : it.second) {
- vespalib::ConstBufferRef data(chunks[idx._id]->getLid(idx._lid));
- drainer.write(idx._bucketId, idx._chunkId, idx._lid, data.c_str(), data.size());
- }
+ for (auto & idx : _where) {
+ vespalib::ConstBufferRef data(chunks[idx._id]->getLid(idx._lid));
+ drainer.write(idx._bucketId, idx._chunkId, idx._lid, data.c_str(), data.size());
}
}
diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.h b/searchlib/src/vespa/searchlib/docstore/storebybucket.h
index dfe6199aa2e..b0930d4be39 100644
--- a/searchlib/src/vespa/searchlib/docstore/storebybucket.h
+++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.h
@@ -7,7 +7,6 @@
#include <vespa/vespalib/data/memorydatastore.h>
#include <vespa/vespalib/util/executor.h>
#include <vespa/vespalib/stllike/hash_map.h>
-#include <map>
#include <condition_variable>
namespace search::docstore {
@@ -34,19 +33,18 @@ public:
class IWrite {
public:
using BucketId=document::BucketId;
- virtual ~IWrite() { }
+ virtual ~IWrite() = default;
virtual void write(BucketId bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) = 0;
};
void add(document::BucketId bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz);
+ void close();
+ /// close() must have been called prior to calling getBucketCount() or drain()
void drain(IWrite & drain);
+ size_t getBucketCount() const;
+
size_t getChunkCount() const;
- size_t getBucketCount() const { return _where.size(); }
size_t getLidCount() const {
- size_t lidCount(0);
- for (const auto & it : _where) {
- lidCount += it.second.size();
- }
- return lidCount;
+ return _where.size();
}
private:
void incChunksPosted();
@@ -69,7 +67,7 @@ private:
using IndexVector = std::vector<Index, vespalib::allocator_large<Index>>;
uint64_t _chunkSerial;
Chunk::UP _current;
- std::map<uint64_t, IndexVector> _where;
+ IndexVector _where;
MemoryDataStore & _backingMemory;
Executor & _executor;
std::unique_ptr<std::mutex> _lock;