diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-06-16 14:10:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-16 14:10:52 +0200 |
commit | d782ba728837c36c20f2b8b6e1757b7305e7b57b (patch) | |
tree | b7cb500d2d18be9cc8b16e157426242c813caa51 /searchlib/src | |
parent | c37b765a10c583ed14ff8cde0e695e4eb1f03478 (diff) | |
parent | 1e229d508131b5099576e857132ce79a55342b4e (diff) |
Merge pull request #27431 from vespa-engine/balder/move-more
- Move when possible.
Diffstat (limited to 'searchlib/src')
5 files changed, 114 insertions, 104 deletions
diff --git a/searchlib/src/tests/docstore/document_store/visitcache_test.cpp b/searchlib/src/tests/docstore/document_store/visitcache_test.cpp index 14e3c19fe33..3f80bb6004f 100644 --- a/searchlib/src/tests/docstore/document_store/visitcache_test.cpp +++ b/searchlib/src/tests/docstore/document_store/visitcache_test.cpp @@ -62,7 +62,7 @@ TEST("require that BlobSet can be built") { a.append(9, B("bbbbb",5)); verifyAB(a); CompressionConfig cfg(CompressionConfig::LZ4); - CompressedBlobSet ca(cfg, a); + CompressedBlobSet ca(cfg, std::move(a)); BlobSet b = ca.getBlobSet(); verifyAB(b); } diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp index 1080d44f2fb..1ca020bca86 100644 --- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp +++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp @@ -368,13 +368,13 @@ TEST("test visit cache does not cache empty ones and is able to access some back IDataStore & datastore = store.getStore(); VisitCache visitCache(datastore, 100000, CompressionConfig::Type::LZ4); - EXPECT_EQUAL(0u, visitCache.read({1}).size()); + EXPECT_EQUAL(12u, visitCache.read({1}).byteSize()); EXPECT_TRUE(visitCache.read({1}).empty()); datastore.write(1,1, A7, 7); - EXPECT_EQUAL(0u, visitCache.read({2}).size()); + EXPECT_EQUAL(12u, visitCache.read({2}).byteSize()); CompressedBlobSet cbs = visitCache.read({1}); EXPECT_FALSE(cbs.empty()); - EXPECT_EQUAL(19u, cbs.size()); + EXPECT_EQUAL(19u, cbs.byteSize()); BlobSet bs(cbs.getBlobSet()); EXPECT_EQUAL(7u, bs.get(1).size()); EXPECT_EQUAL(0, strncmp(A7, bs.get(1).c_str(), 7)); @@ -664,14 +664,14 @@ TEST("test that the integrated visit cache works.") { vcs.remove(17); TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 104, 97, BASE_SZ-671)); vcs.verifyVisit({7,9,17,19,67,88,89}, {7,9,19,67,88,89}, true); - TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 105, 98, BASE_SZ-89)); + TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 105, 98, BASE_SZ-70)); vcs.verifyVisit({41, 42}, true); - TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 106, 99, BASE_SZ+215)); + TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 106, 99, BASE_SZ+230)); vcs.verifyVisit({43, 44}, true); - TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 107, 100, BASE_SZ+520)); + TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 107, 100, BASE_SZ+540)); vcs.verifyVisit({41, 42, 43, 44}, true); - TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 108, 99, BASE_SZ+340)); + TEST_DO(verifyCacheStats(ds.getCacheStats(), 101, 108, 99, BASE_SZ+360)); } TEST("testWriteRead") { diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp index 49beed34f08..7d585007d76 100644 --- a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -437,7 +437,8 @@ DocumentStore::getFileChunkStats() const return _backingStore.getFileChunkStats(); } -CacheStats DocumentStore::getCacheStats() const { +CacheStats +DocumentStore::getCacheStats() const { CacheStats visitStats = _visitCache->getCacheStats(); CacheStats singleStats = _cache->get_stats(); singleStats.add_extra_misses(_uncached_lookups.load(std::memory_order_relaxed)); diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp index c99bb50d4f8..cb56d5d20ff 100644 --- a/searchlib/src/vespa/searchlib/docstore/visitcache.cpp +++ b/searchlib/src/vespa/searchlib/docstore/visitcache.cpp @@ -2,12 +2,14 @@ #include "visitcache.h" #include "ibucketizer.h" -#include <vespa/vespalib/stllike/cache.hpp> -#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/stllike/hash_set.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <vespa/vespalib/stllike/cache.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/compressor.h> #include <vespa/vespalib/util/memory_allocator.h> -#include <algorithm> +#include <vespa/vespalib/stllike/cache.hpp> +#include <vespa/vespalib/stllike/hash_map.hpp> namespace search::docstore { @@ -17,14 +19,14 @@ using vespalib::DataBuffer; using vespalib::alloc::Alloc; using vespalib::alloc::MemoryAllocator; -KeySet::KeySet(uint32_t key) : - _keys() +KeySet::KeySet(uint32_t key) + : _keys() { _keys.push_back(key); } -KeySet::KeySet(const IDocumentStore::LidVector &keys) : - _keys(keys) +KeySet::KeySet(const IDocumentStore::LidVector &keys) + : _keys(keys) { std::sort(_keys.begin(), _keys.end()); } @@ -34,26 +36,26 @@ KeySet::contains(const KeySet &rhs) const { return std::includes(_keys.begin(), _keys.end(), rhs._keys.begin(), rhs._keys.end()); } -BlobSet::BlobSet() : - _positions(), - _buffer(Alloc::alloc(0, 16 * MemoryAllocator::HUGEPAGE_SIZE), 0) +BlobSet::BlobSet() + : _positions(), + _buffer(Alloc::alloc(0, 16 * MemoryAllocator::HUGEPAGE_SIZE), 0) { } BlobSet::~BlobSet() = default; namespace { -size_t getBufferSize(const BlobSet::Positions & p) { +size_t +getBufferSize(const BlobSet::Positions & p) { return p.empty() ? 0 : p.back().offset() + p.back().size(); } } -BlobSet::BlobSet(const Positions & positions, Alloc && buffer) : - _positions(positions), - _buffer(std::move(buffer), getBufferSize(_positions)) -{ -} +BlobSet::BlobSet(Positions positions, Alloc && buffer) noexcept + : _positions(std::move(positions)), + _buffer(std::move(buffer), getBufferSize(_positions)) +{ } void BlobSet::append(uint32_t lid, ConstBufferRef blob) { @@ -74,20 +76,18 @@ BlobSet::get(uint32_t lid) const return buf; } -CompressedBlobSet::CompressedBlobSet() : - _compression(CompressionConfig::Type::LZ4), - _positions(), - _buffer() -{ -} +CompressedBlobSet::CompressedBlobSet() noexcept + : _compression(CompressionConfig::Type::LZ4), + _positions(), + _buffer() +{ } CompressedBlobSet::~CompressedBlobSet() = default; - -CompressedBlobSet::CompressedBlobSet(CompressionConfig compression, const BlobSet & uncompressed) : - _compression(compression.type), - _positions(uncompressed.getPositions()), - _buffer() +CompressedBlobSet::CompressedBlobSet(CompressionConfig compression, BlobSet uncompressed) + : _compression(compression.type), + _positions(uncompressed.stealPositions()), + _buffer() { if ( ! _positions.empty() ) { DataBuffer compressed; @@ -113,7 +113,8 @@ CompressedBlobSet::getBlobSet() const return BlobSet(_positions, std::move(uncompressed).stealBuffer()); } -size_t CompressedBlobSet::size() const { +size_t +CompressedBlobSet::byteSize() const { return _positions.capacity() * sizeof(BlobSet::Positions::value_type) + _buffer->size(); } @@ -122,13 +123,10 @@ namespace { class VisitCollector : public IBufferVisitor { public: - VisitCollector() : - _blobSet() - { } + VisitCollector(BlobSet & blobSet) : _blobSet(blobSet) { } void visit(uint32_t lid, ConstBufferRef buf) override; - const BlobSet & getBlobSet() const { return _blobSet; } private: - BlobSet _blobSet; + BlobSet & _blobSet; }; void @@ -138,13 +136,52 @@ VisitCollector::visit(uint32_t lid, ConstBufferRef buf) { } } +struct ByteSize { + size_t operator() (const CompressedBlobSet & arg) const noexcept { return arg.byteSize(); } +}; + } +using CacheParams = vespalib::CacheParam< + vespalib::LruParam<KeySet, CompressedBlobSet>, + VisitCache::BackingStore, + vespalib::zero<KeySet>, + ByteSize +>; + +/** + * This extends the default thread safe cache implementation so that + * it will correctly invalidate the cached sets when objects are removed/updated. + * It will also detect the addition of new objects to any of the sets upon first + * usage of the set and then invalidate and perform fresh visit of the backing store. + */ +class VisitCache::Cache : public vespalib::cache<CacheParams> { +public: + Cache(BackingStore & b, size_t maxBytes); + ~Cache() override; + CompressedBlobSet readSet(const KeySet & keys); + void removeKey(uint32_t key); + vespalib::MemoryUsage getStaticMemoryUsage() const override; +private: + void locateAndInvalidateOtherSubsets(const UniqueLock & cacheGuard, const KeySet & keys); + using IdSet = vespalib::hash_set<uint64_t>; + using Parent = vespalib::cache<CacheParams>; + using LidUniqueKeySetId = vespalib::hash_map<uint32_t, uint64_t>; + using IdKeySetMap = vespalib::hash_map<uint64_t, KeySet>; + IdSet findSetsContaining(const UniqueLock &, const KeySet & keys) const; + void onInsert(const K & key) override; + void onRemove(const K & key) override; + LidUniqueKeySetId _lid2Id; + IdKeySetMap _id2KeySet; +}; + bool VisitCache::BackingStore::read(const KeySet &key, CompressedBlobSet &blobs) const { - VisitCollector collector; + BlobSet blobSet; + blobSet.reserve(key.getKeys().size()); + VisitCollector collector(blobSet); _backingStore.read(key.getKeys(), collector); - blobs = CompressedBlobSet(_compression.load(std::memory_order_relaxed), collector.getBlobSet()); + blobs = CompressedBlobSet(_compression.load(std::memory_order_relaxed), std::move(blobSet)); return ! blobs.empty(); } @@ -157,8 +194,9 @@ VisitCache::BackingStore::reconfigure(CompressionConfig compression) { VisitCache::VisitCache(IDataStore &store, size_t cacheSize, CompressionConfig compression) : _store(store, compression), _cache(std::make_unique<Cache>(_store, cacheSize)) -{ -} +{ } + +VisitCache::~VisitCache() = default; void VisitCache::reconfigure(size_t cacheSize, CompressionConfig compression) { @@ -166,6 +204,10 @@ VisitCache::reconfigure(size_t cacheSize, CompressionConfig compression) { _cache->setCapacityBytes(cacheSize); } +vespalib::MemoryUsage +VisitCache::getStaticMemoryUsage() const { + return _cache->getStaticMemoryUsage(); +} VisitCache::Cache::IdSet VisitCache::Cache::findSetsContaining(const UniqueLock &, const KeySet & keys) const { @@ -237,8 +279,7 @@ VisitCache::Cache::removeKey(uint32_t subKey) { auto cacheGuard = getGuard(); const auto foundLid = _lid2Id.find(subKey); if (foundLid != _lid2Id.end()) { - K keySet = _id2KeySet[foundLid->second]; - invalidate(cacheGuard, keySet); + invalidate(cacheGuard, _id2KeySet[foundLid->second]); } } diff --git a/searchlib/src/vespa/searchlib/docstore/visitcache.h b/searchlib/src/vespa/searchlib/docstore/visitcache.h index baa594b8d28..f7b419c0fc6 100644 --- a/searchlib/src/vespa/searchlib/docstore/visitcache.h +++ b/searchlib/src/vespa/searchlib/docstore/visitcache.h @@ -3,14 +3,10 @@ #pragma once #include "idocumentstore.h" -#include <vespa/vespalib/stllike/cache.h> -#include <vespa/vespalib/stllike/hash_set.h> -#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/util/alloc.h> #include <vespa/vespalib/util/memory.h> #include <vespa/vespalib/util/compressionconfig.h> #include <vespa/vespalib/objects/nbostream.h> -#include <vespa/document/util/bytebuffer.h> namespace search::docstore { @@ -19,7 +15,7 @@ namespace search::docstore { **/ class KeySet { public: - KeySet() : _keys() { } + KeySet() noexcept : _keys() { } KeySet(uint32_t key); explicit KeySet(const IDocumentStore::LidVector &keys); uint32_t hash() const noexcept { return _keys.empty() ? 0 : _keys[0]; } @@ -51,12 +47,14 @@ public: using Positions = std::vector<LidPosition>; BlobSet(); - BlobSet(const Positions & positions, vespalib::alloc::Alloc && buffer); - BlobSet(BlobSet &&) = default; - BlobSet &operator = (BlobSet &&) = default; + BlobSet(Positions positions, vespalib::alloc::Alloc && buffer) noexcept; + BlobSet(BlobSet &&) noexcept = default; + BlobSet &operator = (BlobSet &&) noexcept = default; ~BlobSet(); + void reserve(size_t elems) { _positions.reserve(elems);} void append(uint32_t lid, vespalib::ConstBufferRef blob); const Positions & getPositions() const { return _positions; } + Positions && stealPositions() { return std::move(_positions); } vespalib::ConstBufferRef get(uint32_t lid) const; vespalib::ConstBufferRef getBuffer() const { return vespalib::ConstBufferRef(_buffer.data(), _buffer.size()); } private: @@ -73,14 +71,14 @@ private: class CompressedBlobSet { public: using CompressionConfig = vespalib::compression::CompressionConfig; - CompressedBlobSet(); - CompressedBlobSet(CompressionConfig compression, const BlobSet & uncompressed); - CompressedBlobSet(CompressedBlobSet && rhs) = default; - CompressedBlobSet & operator=(CompressedBlobSet && rhs) = default; + CompressedBlobSet() noexcept; + CompressedBlobSet(CompressionConfig compression, BlobSet uncompressed); + CompressedBlobSet(CompressedBlobSet && rhs) noexcept = default; + CompressedBlobSet & operator=(CompressedBlobSet && rhs) noexcept = default; CompressedBlobSet(const CompressedBlobSet & rhs) = default; CompressedBlobSet & operator=(const CompressedBlobSet & rhs) = default; ~CompressedBlobSet(); - size_t size() const; + size_t byteSize() const; bool empty() const { return _positions.empty(); } BlobSet getBlobSet() const; private: @@ -98,26 +96,27 @@ class VisitCache { public: using CompressionConfig = vespalib::compression::CompressionConfig; VisitCache(IDataStore &store, size_t cacheSize, CompressionConfig compression); + ~VisitCache(); CompressedBlobSet read(const IDocumentStore::LidVector & keys) const; void remove(uint32_t key); void invalidate(uint32_t key) { remove(key); } vespalib::CacheStats getCacheStats() const; - vespalib::MemoryUsage getStaticMemoryUsage() const { return _cache->getStaticMemoryUsage(); } + vespalib::MemoryUsage getStaticMemoryUsage() const; void reconfigure(size_t cacheSize, CompressionConfig compression); -private: + /** - * This implments the interface the cache uses when it has a cache miss. - * It wraps an IDataStore. Given a set of lids it will visit all objects - * and compress them as a complete set to maximize compression rate. - * As this is a readonly cache the write/erase methods are noops. - */ + * This implments the interface the cache uses when it has a cache miss. + * It wraps an IDataStore. Given a set of lids it will visit all objects + * and compress them as a complete set to maximize compression rate. + * As this is a readonly cache the write/erase methods are noops. + */ class BackingStore { public: - BackingStore(IDataStore &store, CompressionConfig compression) : - _backingStore(store), - _compression(compression) + BackingStore(IDataStore &store, CompressionConfig compression) + : _backingStore(store), + _compression(compression) { } bool read(const KeySet &key, CompressedBlobSet &blobs) const; void write(const KeySet &, const CompressedBlobSet &) { } @@ -128,40 +127,9 @@ private: IDataStore &_backingStore; std::atomic<CompressionConfig> _compression; }; +private: - using CacheParams = vespalib::CacheParam< - vespalib::LruParam<KeySet, CompressedBlobSet>, - BackingStore, - vespalib::zero<KeySet>, - vespalib::size<CompressedBlobSet> - >; - - /** - * This extends the default thread safe cache implementation so that - * it will correctly invalidate the cached sets when objects are removed/updated. - * It will also detect the addition of new objects to any of the sets upon first - * usage of the set and then invalidate and perform fresh visit of the backing store. - */ - class Cache : public vespalib::cache<CacheParams> { - public: - Cache(BackingStore & b, size_t maxBytes); - ~Cache() override; - CompressedBlobSet readSet(const KeySet & keys); - void removeKey(uint32_t key); - vespalib::MemoryUsage getStaticMemoryUsage() const override; - private: - void locateAndInvalidateOtherSubsets(const UniqueLock & cacheGuard, const KeySet & keys); - using IdSet = vespalib::hash_set<uint64_t>; - using Parent = vespalib::cache<CacheParams>; - using LidUniqueKeySetId = vespalib::hash_map<uint32_t, uint64_t>; - using IdKeySetMap = vespalib::hash_map<uint64_t, KeySet>; - IdSet findSetsContaining(const UniqueLock &, const KeySet & keys) const; - void onInsert(const K & key) override; - void onRemove(const K & key) override; - LidUniqueKeySetId _lid2Id; - IdKeySetMap _id2KeySet; - }; - + class Cache; BackingStore _store; std::unique_ptr<Cache> _cache; }; |