diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-09-15 15:01:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-15 15:01:32 +0200 |
commit | dcdb0188e4daf6b60c43b31b391fc32bc880f879 (patch) | |
tree | 9349f64bef19af58aeca577335d6660c2a6fbf24 | |
parent | 2901e8453fe26a6e9fc2cdf892d01a533139e231 (diff) | |
parent | 66cd305337c44b84589fb91e95d3d9662f8e6c5e (diff) |
Merge pull request #3410 from vespa-engine/balder/decompress-in-multiple-threads
Use multiple threads in decompress due to more expensive zstd
10 files changed, 81 insertions, 37 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/server/executor_thread_service.h b/searchcore/src/vespa/searchcore/proton/server/executor_thread_service.h index 47e41fcd1db..c938288c714 100644 --- a/searchcore/src/vespa/searchcore/proton/server/executor_thread_service.h +++ b/searchcore/src/vespa/searchcore/proton/server/executor_thread_service.h @@ -33,6 +33,7 @@ public: return *this; } virtual bool isCurrentThread() const override; + size_t getNumThreads() const override { return _executor.getNumThreads(); } }; } // namespace proton diff --git a/searchcore/src/vespa/searchcore/proton/test/thread_service_observer.h b/searchcore/src/vespa/searchcore/proton/test/thread_service_observer.h index a2f0724d396..905ec5ef07b 100644 --- a/searchcore/src/vespa/searchcore/proton/test/thread_service_observer.h +++ b/searchcore/src/vespa/searchcore/proton/test/thread_service_observer.h @@ -3,8 +3,7 @@ #include <vespa/searchcorespi/index/i_thread_service.h> -namespace proton { -namespace test { +namespace proton::test { class ThreadServiceObserver : public searchcorespi::index::IThreadService { @@ -38,9 +37,8 @@ public: virtual bool isCurrentThread() const override { return _service.isCurrentThread(); } -}; - -} // namespace test -} // namespace proton + size_t getNumThreads() const override { return _service.getNumThreads(); } +}; +} diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp index 2a748a302c6..4fac42c1421 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -4,14 +4,17 @@ #include "data_store_file_chunk_stats.h" #include "summaryexceptions.h" #include "randreaders.h" +#include <vespa/searchlib/util/filekit.h> +#include <vespa/searchlib/common/lambdatask.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/blockingthreadstackexecutor.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/searchlib/util/filekit.h> -#include <vespa/vespalib/objects/nbostream.h> #include <vespa/fastos/file.h> +#include <future> #include <vespa/log/log.h> LOG_SETUP(".search.filechunk"); @@ -296,35 +299,64 @@ FileChunk::getModificationTime() const return _modificationTime; } +namespace { + +using FutureChunk = std::future<Chunk::UP>; + +struct FixedParams { + const IGetLid & db; + IWriteData & dest; + const vespalib::GenerationHandler::Guard & lidReadGuard; + uint32_t fileId; + IFileChunkVisitorProgress *visitorProgress; +}; + +void +appendChunks(FixedParams * args, Chunk::UP chunk) +{ + const Chunk::LidList ll(chunk->getUniqueLids()); + for (const Chunk::Entry & e : ll) { + LidInfo lidInfo(args->fileId, chunk->getId(), e.netSize()); + if (args->db.getLid(args->lidReadGuard, e.getLid()) == lidInfo) { + vespalib::LockGuard guard(args->db.getLidGuard(e.getLid())); + if (args->db.getLid(args->lidReadGuard, e.getLid()) == lidInfo) { + // I am still in use so I need to taken care of. + vespalib::ConstBufferRef data(chunk->getLid(e.getLid())); + args->dest.write(guard, chunk->getId(), e.getLid(), data.c_str(), data.size()); + } + } + } + if (args->visitorProgress != NULL) { + args->visitorProgress->updateProgress(); + } +} + +} + void -FileChunk::appendTo(const IGetLid & db, IWriteData & dest, - uint32_t numChunks, - IFileChunkVisitorProgress *visitorProgress) +FileChunk::appendTo(vespalib::ThreadExecutor & executor, const IGetLid & db, IWriteData & dest, + uint32_t numChunks, IFileChunkVisitorProgress *visitorProgress) { assert(frozen() || visitorProgress); vespalib::GenerationHandler::Guard lidReadGuard(db.getLidReadGuard()); assert(numChunks <= getNumChunks()); + FixedParams fixedParams = {db, dest, lidReadGuard, getFileId().getId(), visitorProgress}; + vespalib::BlockingThreadStackExecutor singleExecutor(1, 64*1024, executor.getNumThreads()*2); for (size_t chunkId(0); chunkId < numChunks; chunkId++) { - const ChunkInfo & cInfo(_chunkInfo[chunkId]); - vespalib::DataBuffer whole(0ul, ALIGNMENT); - FileRandRead::FSP keepAlive(_file->read(cInfo.getOffset(), whole, cInfo.getSize())); - Chunk chunk(chunkId, whole.getData(), whole.getDataLen()); - const Chunk::LidList ll(chunk.getUniqueLids()); - for (const Chunk::Entry & e : ll) { - LidInfo lidInfo(getFileId().getId(), chunk.getId(), e.netSize()); - if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) { - vespalib::LockGuard guard(db.getLidGuard(e.getLid())); - if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) { - // I am still in use so I need to taken care of. - vespalib::ConstBufferRef data(chunk.getLid(e.getLid())); - dest.write(guard, chunk.getId(), e.getLid(), data.c_str(), data.size()); - } - } - } - if (visitorProgress != NULL) { - visitorProgress->updateProgress(); - } + std::promise<Chunk::UP> promisedChunk; + std::future<Chunk::UP> futureChunk = promisedChunk.get_future(); + executor.execute(makeLambdaTask([promise = std::move(promisedChunk), chunkId, this]() mutable { + const ChunkInfo & cInfo(_chunkInfo[chunkId]); + vespalib::DataBuffer whole(0ul, ALIGNMENT); + FileRandRead::FSP keepAlive(_file->read(cInfo.getOffset(), whole, cInfo.getSize())); + promise.set_value(std::make_unique<Chunk>(chunkId, whole.getData(), whole.getDataLen())); + })); + + singleExecutor.execute(makeLambdaTask([args = &fixedParams, chunk = std::move(futureChunk)]() mutable { + appendChunks(args, chunk.get()); + })); } + singleExecutor.sync(); dest.close(); } diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h index 326a185f9de..87dc2e018eb 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h @@ -18,6 +18,7 @@ class FastOS_FileInterface; namespace vespalib { class DataBuffer; class GenericHeader; + class ThreadExecutor; } namespace search { @@ -161,7 +162,8 @@ public: virtual bool frozen() const { return true; } const vespalib::string & getName() const { return _name; } void compact(const IGetLid & iGetLid); - void appendTo(const IGetLid & db, IWriteData & dest, uint32_t numChunks, IFileChunkVisitorProgress *visitorProgress); + void appendTo(vespalib::ThreadExecutor & executor, const IGetLid & db, IWriteData & dest, + uint32_t numChunks, IFileChunkVisitorProgress *visitorProgress); /** * Must be called after chunk has been created to allow correct * underlying file object to be created. Must be called before diff --git a/searchlib/src/vespa/searchlib/docstore/lid_info.h b/searchlib/src/vespa/searchlib/docstore/lid_info.h index e86a86bce49..10ddd868c41 100644 --- a/searchlib/src/vespa/searchlib/docstore/lid_info.h +++ b/searchlib/src/vespa/searchlib/docstore/lid_info.h @@ -79,7 +79,7 @@ public: using Guard = vespalib::GenerationHandler::Guard; virtual ~IGetLid() { } - virtual LidInfo getLid(Guard & guard, uint32_t lid) const = 0; + virtual LidInfo getLid(const Guard & guard, uint32_t lid) const = 0; virtual vespalib::LockGuard getLidGuard(uint32_t lid) const = 0; virtual Guard getLidReadGuard() const = 0; }; diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index 024e64c0bdd..4fa4142813c 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -461,7 +461,7 @@ void LogDataStore::compactFile(FileId fileId) compacter.reset(new docstore::Compacter(*this)); } - fc->appendTo(*this, *compacter, fc->getNumChunks(), nullptr); + fc->appendTo(_executor, *this, *compacter, fc->getNumChunks(), nullptr); if (destinationFileId.isActive()) { flushActiveAndWait(0); @@ -1067,7 +1067,7 @@ LogDataStore::accept(IDataStoreVisitor &visitor, WrapVisitorProgress wrapProgress(visitorProgress, totalChunks); for (FileId fcId : fileChunks) { FileChunk & fc = *_fileChunks[fcId.getId()]; - fc.appendTo(*this, wrap, fc.getNumChunks(), &wrapProgress); + fc.appendTo(_executor, *this, wrap, fc.getNumChunks(), &wrapProgress); if (prune) { internalFlushAll(); FileChunk::UP toDie; @@ -1078,7 +1078,7 @@ LogDataStore::accept(IDataStoreVisitor &visitor, toDie->erase(); } } - lfc.appendTo(*this, wrap, lastChunks, &wrapProgress); + lfc.appendTo(_executor, *this, wrap, lastChunks, &wrapProgress); if (prune) { internalFlushAll(); } diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h index 080e6f80503..eb46e5438a9 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.h +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -176,7 +176,7 @@ public: } // Implements IGetLid API - LidInfo getLid(Guard & guard, uint32_t lid) const override { + LidInfo getLid(const Guard & guard, uint32_t lid) const override { (void) guard; if (lid < getDocIdLimit()) { return _lidInfo[lid]; diff --git a/vespalib/src/vespa/vespalib/util/threadexecutor.h b/vespalib/src/vespa/vespalib/util/threadexecutor.h index 3ec19ea9a71..c202a0a2373 100644 --- a/vespalib/src/vespa/vespalib/util/threadexecutor.h +++ b/vespalib/src/vespa/vespalib/util/threadexecutor.h @@ -15,6 +15,11 @@ class ThreadExecutor : public Executor, public Syncable { public: + /** + * Get number of threads in the executor pool. + * @return number of threads in the pool + */ + virtual size_t getNumThreads() const = 0; }; } // namespace vespalib diff --git a/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.cpp b/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.cpp index 76557762479..21d1de2a29b 100644 --- a/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.cpp +++ b/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.cpp @@ -155,6 +155,10 @@ ThreadStackExecutorBase::start(uint32_t threads) } } +size_t ThreadStackExecutorBase::getNumThreads() const { + return _pool->GetNumStartedThreads(); +} + void ThreadStackExecutorBase::internalSetTaskLimit(uint32_t taskLimit) { diff --git a/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.h b/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.h index 4ea27a2bcde..ee142659027 100644 --- a/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.h +++ b/vespalib/src/vespa/vespalib/util/threadstackexecutorbase.h @@ -98,7 +98,7 @@ private: void unblock(); }; - std::unique_ptr<FastOS_ThreadPool> _pool; + std::unique_ptr<FastOS_ThreadPool> _pool; Monitor _monitor; Stats _stats; Gate _executorCompletion; @@ -223,6 +223,8 @@ public: **/ void wait_for_task_count(uint32_t task_count); + size_t getNumThreads() const override; + /** * Shut down this executor. This will make this executor reject * all new tasks. |