diff options
20 files changed, 432 insertions, 417 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp index 4748d98cc8b..212f843a6f4 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summarymanager.cpp @@ -1,5 +1,5 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> + #include "documentstoreadapter.h" #include "summarycompacttarget.h" #include "summaryflushtarget.h" @@ -8,6 +8,8 @@ #include <vespa/searchsummary/docsummary/docsumconfig.h> #include <vespa/config/print/ostreamconfigwriter.h> #include <vespa/juniper/rpinterface.h> +#include <vespa/vespalib/util/exceptions.h> + #include <vespa/log/log.h> LOG_SETUP(".proton.docsummary.summarymanager"); diff --git a/searchlib/src/apps/docstore/create-idx-from-dat.cpp b/searchlib/src/apps/docstore/create-idx-from-dat.cpp index 8b02a9bad30..da6b887c27c 100644 --- a/searchlib/src/apps/docstore/create-idx-from-dat.cpp +++ b/searchlib/src/apps/docstore/create-idx-from-dat.cpp @@ -1,10 +1,11 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> #include <vespa/searchlib/docstore/logdatastore.h> +#include <vespa/searchlib/docstore/randreaders.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> -#include <vespa/searchlib/transactionlog/nosyncproxy.h> -#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/fastos/app.h> +#include <vespa/vespalib/util/exception.h> using namespace search; diff --git a/searchlib/src/apps/docstore/documentstoreinspect.cpp b/searchlib/src/apps/docstore/documentstoreinspect.cpp index 587565672c0..d16cac2b0e9 100644 --- a/searchlib/src/apps/docstore/documentstoreinspect.cpp +++ b/searchlib/src/apps/docstore/documentstoreinspect.cpp @@ -1,9 +1,10 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> #include <vespa/searchlib/docstore/logdatastore.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/transactionlog/nosyncproxy.h> +#include <vespa/fastos/app.h> +#include <vespa/vespalib/objects/nbostream.h> using namespace search; diff --git a/searchlib/src/apps/docstore/verifylogdatastore.cpp b/searchlib/src/apps/docstore/verifylogdatastore.cpp index 1fdef641eac..f69e05fe521 100644 --- a/searchlib/src/apps/docstore/verifylogdatastore.cpp +++ b/searchlib/src/apps/docstore/verifylogdatastore.cpp @@ -1,9 +1,11 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> + #include <vespa/searchlib/docstore/logdatastore.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/transactionlog/nosyncproxy.h> +#include <vespa/fastos/app.h> +#include <vespa/vespalib/util/exception.h> using namespace search; diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt index 01f658e51c1..e318c545a59 100644 --- a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt @@ -14,7 +14,9 @@ vespa_add_library(searchlib_docstore OBJECT idocumentstore.cpp logdatastore.cpp logdocumentstore.cpp + randreaders.cpp storebybucket.cpp + summaryexceptions.cpp visitcache.cpp writeablefilechunk.cpp DEPENDS diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.cpp b/searchlib/src/vespa/searchlib/docstore/chunk.cpp index 26a963b9e02..9821845545c 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/chunk.cpp @@ -43,6 +43,11 @@ Chunk::hasRoom(size_t len) const && ((_lids.size() + 1) <= _lids.capacity())); } +size_t +Chunk::getMaxPackSize(const document::CompressionConfig & compression) const { + return _format->getMaxPackSize(compression); +} + void Chunk::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression) { @@ -77,6 +82,8 @@ Chunk::Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc) : os >> _lastSerial; } +Chunk::~Chunk() { } + vespalib::ConstBufferRef Chunk::getLid(uint32_t lid) const { @@ -97,6 +104,21 @@ Chunk::getLid(uint32_t lid) const return buf; } +size_t +Chunk::size() const { + return getData().size(); +} + +const vespalib::nbostream & +Chunk::getData() const { + return _format->getBuffer(); +} + +vespalib::nbostream & +Chunk::getData() { + return _format->getBuffer(); +} + Chunk::LidList Chunk::getUniqueLids() const { diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.h b/searchlib/src/vespa/searchlib/docstore/chunk.h index 66bbd2de950..d81055e099e 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunk.h +++ b/searchlib/src/vespa/searchlib/docstore/chunk.h @@ -2,12 +2,21 @@ #pragma once -#include <vespa/searchlib/docstore/chunkformat.h> #include <vespa/searchlib/util/memoryusage.h> -#include <vespa/vespalib/util/memory.h> +#include <vespa/vespalib/util/buffer.h> +#include <vespa/document/util/compressionconfig.h> +#include <memory> +#include <vector> + +namespace vespalib { + class nbostream; + class DataBuffer; +} namespace search { +class ChunkFormat; + class ChunkMeta { public: ChunkMeta() : @@ -78,30 +87,31 @@ public: typedef std::vector<Entry> LidList; Chunk(uint32_t id, const Config & config); Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc=false); + ~Chunk(); LidMeta append(uint32_t lid, const void * buffer, size_t len); ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const; size_t count() const { return _lids.size(); } bool empty() const { return count() == 0; } - size_t size() const { return getData().size(); } + size_t size() const; const LidList & getLids() const { return _lids; } LidList getUniqueLids() const; - size_t getMaxPackSize(const document::CompressionConfig & compression) const { return _format->getMaxPackSize(compression); } + size_t getMaxPackSize(const document::CompressionConfig & compression) const; void pack(uint64_t lastSerial, vespalib::DataBuffer & buffer, const document::CompressionConfig & compression); uint64_t getLastSerial() const { return _lastSerial; } uint32_t getId() const { return _id; } bool validSerial() const { return getLastSerial() != static_cast<uint64_t>(-1l); } vespalib::ConstBufferRef getLid(uint32_t lid) const; - const vespalib::nbostream & getData() const { return _format->getBuffer(); } + const vespalib::nbostream & getData() const; bool hasRoom(size_t len) const; MemoryUsage getMemoryUsage() const; private: - vespalib::nbostream & getData() { return _format->getBuffer(); } + vespalib::nbostream & getData(); - uint32_t _id; - uint32_t _nextOffset; - uint64_t _lastSerial; - ChunkFormat::UP _format; - LidList _lids; + uint32_t _id; + uint32_t _nextOffset; + uint64_t _lastSerial; + std::unique_ptr<ChunkFormat> _format; + LidList _lids; }; typedef std::vector<ChunkMeta> ChunkMetaV; diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformats.h b/searchlib/src/vespa/searchlib/docstore/chunkformats.h index 15a45ec7e60..29a396389ac 100644 --- a/searchlib/src/vespa/searchlib/docstore/chunkformats.h +++ b/searchlib/src/vespa/searchlib/docstore/chunkformats.h @@ -2,7 +2,7 @@ #pragma once -#include <vespa/searchlib/docstore/chunkformat.h> +#include "chunkformat.h" namespace search { diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp index d186cbc0b83..1670a129e04 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -2,22 +2,21 @@ #include "filechunk.h" #include "data_store_file_chunk_stats.h" -#include <vespa/searchlib/util/filekit.h> +#include "summaryexceptions.h" +#include "randreaders.h" #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/searchlib/util/filekit.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/log/log.h> LOG_SETUP(".search.filechunk"); using vespalib::GenericHeader; -using vespalib::FileHeader; -using vespalib::IoException; -using vespalib::getLastErrorString; using vespalib::getErrorString; - namespace search { namespace { @@ -29,15 +28,6 @@ constexpr size_t ENTRY_BIAS_SIZE=8; using vespalib::make_string; -SummaryException::SummaryException(const vespalib::stringref &msg, - FastOS_FileInterface &file, - const vespalib::stringref &location) - : IoException(make_string("%s : Failing file = '%s'. Reason given by OS = '%s'", - msg.c_str(), file.GetFileName(), file.getLastErrorString().c_str()), - getErrorType(file.GetLastError()), location) -{ -} - FileChunk::ChunkInfo::ChunkInfo(uint64_t offset, uint32_t size, uint64_t lastSerial) : _lastSerial(lastSerial), _offset(offset), @@ -64,145 +54,6 @@ LidInfo::LidInfo(uint32_t fileId, uint32_t chunkId, uint32_t sz) } } -DirectIORandRead::DirectIORandRead(const vespalib::string & fileName) - : _file(fileName.c_str()), - _alignment(1), - _granularity(1), - _maxChunkSize(0x100000) -{ - _file.EnableDirectIO(); - if (_file.OpenReadOnly()) { - if (!_file.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) { - LOG(debug, "Direct IO setup failed for file %s due to %s", - _file.GetFileName(), _file.getLastErrorString().c_str()); - } - } else { - throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); - } -} - -FileRandRead::FSP -DirectIORandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) -{ - size_t padBefore(0); - size_t padAfter(0); - bool directio = _file.DirectIOPadding(offset, sz, padBefore, padAfter); - buffer.clear(); - buffer.ensureFree(padBefore + sz + padAfter + _alignment - 1); - if (directio) { - size_t unAligned = (-reinterpret_cast<size_t>(buffer.getFree()) & (_alignment - 1)); - buffer.moveFreeToData(unAligned); - buffer.moveDataToDead(unAligned); - } - // XXX needs to use pread or file-position-mutex - _file.ReadBuf(buffer.getFree(), padBefore + sz + padAfter, offset - padBefore); - buffer.moveFreeToData(padBefore + sz); - buffer.moveDataToDead(padBefore); - return FSP(); -} - - -int64_t -DirectIORandRead::getSize(void) -{ - return _file.GetSize(); -} - - -MMapRandRead::MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions) - : _file(fileName.c_str()) -{ - _file.enableMemoryMap(mmapFlags); - _file.setFAdviseOptions(fadviseOptions); - if ( ! _file.OpenReadOnly()) { - throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); - } -} - - -NormalRandRead::NormalRandRead(const vespalib::string & fileName) - : _file(fileName.c_str()) -{ - if ( ! _file.OpenReadOnly()) { - throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); - } -} - -FileRandRead::FSP -MMapRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) -{ - const char *ptr = static_cast<const char *>(_file.MemoryMapPtr(offset)); - vespalib::DataBuffer(ptr, sz).swap(buffer); - return FSP(); -} - -int64_t -MMapRandRead::getSize(void) -{ - return _file.GetSize(); -} - -MMapRandReadDynamic::MMapRandReadDynamic(const vespalib::string &fileName, int mmapFlags, int fadviseOptions) - : _fileName(fileName), - _mmapFlags(mmapFlags), - _fadviseOptions(fadviseOptions) -{ - reopen(); -} - -void -MMapRandReadDynamic::reopen() -{ - std::unique_ptr<FastOS_File> file(new FastOS_File(_fileName.c_str())); - file->enableMemoryMap(_mmapFlags); - file->setFAdviseOptions(_fadviseOptions); - if (file->OpenReadOnly()) { - _holder.set(file.release()); - _holder.latch(); - } else { - throw SummaryException("Failed opening data file", *file, VESPA_STRLOC); - } -} - -FileRandRead::FSP -MMapRandReadDynamic::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) -{ - FSP file(_holder.get()); - const char * data(static_cast<const char *>(file->MemoryMapPtr(offset))); - if ((data == NULL) || (file->MemoryMapPtr(offset+sz-1) == NULL)) { - // Must check that both start and end of file is mapped in. - // Previous reopen could happend during a partial write of this buffer. - // This should fix bug 4630695. - reopen(); - file = _holder.get(); - data = static_cast<const char *>(file->MemoryMapPtr(offset)); - } - vespalib::DataBuffer(data, sz).swap(buffer); - return file; -} - -int64_t -MMapRandReadDynamic::getSize(void) -{ - return _holder.get()->GetSize(); -} - -FileRandRead::FSP -NormalRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) -{ - buffer.clear(); - buffer.ensureFree(sz); - _file.ReadBuf(buffer.getFree(), sz, offset); - buffer.moveFreeToData(sz); - return FSP(); -} - -int64_t -NormalRandRead::getSize(void) -{ - return _file.GetSize(); -} - vespalib::string FileChunk::NameId::createName(const vespalib::string &baseName) const { vespalib::asciistream os; @@ -262,13 +113,10 @@ FileChunk::FileChunk(FileId fileId, NameId nameId, const vespalib::string & base dataFile.Close(); throw SummaryException("Failed opening idx file", idxFile, VESPA_STRLOC); } - } else { } } -FileChunk::~FileChunk() -{ -} +FileChunk::~FileChunk() { } void FileChunk::addNumBuckets(size_t numBucketsInChunk) diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h index 38540c8fc88..be5de8ca6cc 100644 --- a/searchlib/src/vespa/searchlib/docstore/filechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h @@ -2,19 +2,21 @@ #pragma once -#include <vespa/searchlib/docstore/chunk.h> -#include <vespa/searchlib/docstore/ibucketizer.h> +#include "chunk.h" +#include "ibucketizer.h" +#include "randread.h" #include <vespa/searchlib/util/memoryusage.h> #include <vespa/vespalib/util/ptrholder.h> #include <vespa/vespalib/util/sync.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/searchlib/common/tunefileinfo.h> #include <vespa/vespalib/util/generationhandler.h> -#include <vespa/vespalib/util/exceptions.h> -#include <vespa/fastos/file.h> -namespace search -{ +class FastOS_FileInterface; + +namespace vespalib { class DataBuffer; } + +namespace search { class IDataStoreVisitorProgress; class DataStoreFileChunkStats; @@ -103,63 +105,6 @@ public: virtual void updateProgress() = 0; }; -class FileRandRead -{ -public: - typedef std::shared_ptr<FastOS_File> FSP; - virtual ~FileRandRead() { } - virtual FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) = 0; - virtual int64_t getSize(void) = 0; -}; - -class DirectIORandRead : public FileRandRead -{ -public: - DirectIORandRead(const vespalib::string & fileName); - FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; - int64_t getSize(void) override; -private: - FastOS_File _file; - size_t _alignment; - size_t _granularity; - size_t _maxChunkSize; -}; - -class MMapRandRead : public FileRandRead -{ -public: - MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); - FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; - int64_t getSize(void) override; - const void * getMapping() { return _file.MemoryMapPtr(0); } -private: - FastOS_File _file; -}; - -class MMapRandReadDynamic : public FileRandRead -{ -public: - MMapRandReadDynamic(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); - FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; - int64_t getSize(void) override; -private: - void reopen(); - vespalib::string _fileName; - vespalib::PtrHolder<FastOS_File> _holder; - int _mmapFlags; - int _fadviseOptions; -}; - -class NormalRandRead : public FileRandRead -{ -public: - NormalRandRead(const vespalib::string & fileName); - FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; - int64_t getSize(void) override; -private: - FastOS_File _file; -}; - class BucketDensityComputer { public: @@ -218,7 +163,7 @@ public: private: int32_t _id; }; - typedef vespalib::hash_map<uint32_t, vespalib::DataBuffer::UP> LidBufferMap; + typedef vespalib::hash_map<uint32_t, std::unique_ptr<vespalib::DataBuffer>> LidBufferMap; typedef std::unique_ptr<FileChunk> UP; typedef uint32_t SubChunkId; FileChunk(FileId fileId, NameId nameId, const vespalib::string & baseName, const TuneFileSummary & tune, const IBucketizer * bucketizer, bool skipCrcOnRead); @@ -361,12 +306,4 @@ protected: fastos::TimeStamp _modificationTime; }; -class SummaryException : public vespalib::IoException -{ -public: - SummaryException(const vespalib::stringref &msg, - FastOS_FileInterface & file, - const vespalib::stringref &location); -}; - } // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp index fb7056967ad..e34c767e454 100644 --- a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -8,6 +8,7 @@ #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/searchlib/common/rcuvector.hpp> +#include <vespa/vespalib/util/exceptions.h> #include <thread> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/docstore/randread.h b/searchlib/src/vespa/searchlib/docstore/randread.h new file mode 100644 index 00000000000..e623cda32f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/randread.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> + +class FastOS_FileInterface; + +namespace vespalib { class DataBuffer; } + +namespace search { + +class FileRandRead +{ +public: + typedef std::shared_ptr<FastOS_FileInterface> FSP; + virtual ~FileRandRead() { } + virtual FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) = 0; + virtual int64_t getSize(void) = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/docstore/randreaders.cpp b/searchlib/src/vespa/searchlib/docstore/randreaders.cpp new file mode 100644 index 00000000000..f0d7a6b1dff --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/randreaders.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "randreaders.h" +#include "summaryexceptions.h" +#include <vespa/vespalib/data/databuffer.h> + +#include <vespa/log/log.h> +LOG_SETUP(".search.docstore.randreaders"); + +namespace search { + +DirectIORandRead::DirectIORandRead(const vespalib::string & fileName) + : _file(fileName.c_str()), + _alignment(1), + _granularity(1), + _maxChunkSize(0x100000) +{ + _file.EnableDirectIO(); + if (_file.OpenReadOnly()) { + if (!_file.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) { + LOG(debug, "Direct IO setup failed for file %s due to %s", + _file.GetFileName(), _file.getLastErrorString().c_str()); + } + } else { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +DirectIORandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + size_t padBefore(0); + size_t padAfter(0); + bool directio = _file.DirectIOPadding(offset, sz, padBefore, padAfter); + buffer.clear(); + buffer.ensureFree(padBefore + sz + padAfter + _alignment - 1); + if (directio) { + size_t unAligned = (-reinterpret_cast<size_t>(buffer.getFree()) & (_alignment - 1)); + buffer.moveFreeToData(unAligned); + buffer.moveDataToDead(unAligned); + } + // XXX needs to use pread or file-position-mutex + _file.ReadBuf(buffer.getFree(), padBefore + sz + padAfter, offset - padBefore); + buffer.moveFreeToData(padBefore + sz); + buffer.moveDataToDead(padBefore); + return FSP(); +} + + +int64_t +DirectIORandRead::getSize() +{ + return _file.GetSize(); +} + + +MMapRandRead::MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions) + : _file(fileName.c_str()) +{ + _file.enableMemoryMap(mmapFlags); + _file.setFAdviseOptions(fadviseOptions); + if ( ! _file.OpenReadOnly()) { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + + +NormalRandRead::NormalRandRead(const vespalib::string & fileName) + : _file(fileName.c_str()) +{ + if ( ! _file.OpenReadOnly()) { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +MMapRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + const char *ptr = static_cast<const char *>(_file.MemoryMapPtr(offset)); + vespalib::DataBuffer(ptr, sz).swap(buffer); + return FSP(); +} + +int64_t +MMapRandRead::getSize() +{ + return _file.GetSize(); +} + +MMapRandReadDynamic::MMapRandReadDynamic(const vespalib::string &fileName, int mmapFlags, int fadviseOptions) + : _fileName(fileName), + _mmapFlags(mmapFlags), + _fadviseOptions(fadviseOptions) +{ + reopen(); +} + +void +MMapRandReadDynamic::reopen() +{ + std::unique_ptr<FastOS_File> file(new FastOS_File(_fileName.c_str())); + file->enableMemoryMap(_mmapFlags); + file->setFAdviseOptions(_fadviseOptions); + if (file->OpenReadOnly()) { + _holder.set(file.release()); + _holder.latch(); + } else { + throw SummaryException("Failed opening data file", *file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +MMapRandReadDynamic::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + FSP file(_holder.get()); + const char * data(static_cast<const char *>(file->MemoryMapPtr(offset))); + if ((data == NULL) || (file->MemoryMapPtr(offset+sz-1) == NULL)) { + // Must check that both start and end of file is mapped in. + // Previous reopen could happend during a partial write of this buffer. + // This should fix bug 4630695. + reopen(); + file = _holder.get(); + data = static_cast<const char *>(file->MemoryMapPtr(offset)); + } + vespalib::DataBuffer(data, sz).swap(buffer); + return file; +} + +int64_t +MMapRandReadDynamic::getSize() +{ + return _holder.get()->GetSize(); +} + +FileRandRead::FSP +NormalRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + buffer.clear(); + buffer.ensureFree(sz); + _file.ReadBuf(buffer.getFree(), sz, offset); + buffer.moveFreeToData(sz); + return FSP(); +} + +int64_t +NormalRandRead::getSize() +{ + return _file.GetSize(); +} + +} diff --git a/searchlib/src/vespa/searchlib/docstore/randreaders.h b/searchlib/src/vespa/searchlib/docstore/randreaders.h new file mode 100644 index 00000000000..fdd8d8c381c --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/randreaders.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "randread.h" +#include <vespa/vespalib/util/ptrholder.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/fastos/file.h> + +namespace search { + +class DirectIORandRead : public FileRandRead +{ +public: + DirectIORandRead(const vespalib::string & fileName); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize() override; +private: + FastOS_File _file; + size_t _alignment; + size_t _granularity; + size_t _maxChunkSize; +}; + +class MMapRandRead : public FileRandRead +{ +public: + MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize() override; + const void * getMapping() { return _file.MemoryMapPtr(0); } +private: + FastOS_File _file; +}; + +class MMapRandReadDynamic : public FileRandRead +{ +public: + MMapRandReadDynamic(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize() override; +private: + void reopen(); + vespalib::string _fileName; + vespalib::PtrHolder<FastOS_File> _holder; + int _mmapFlags; + int _fadviseOptions; +}; + +class NormalRandRead : public FileRandRead +{ +public: + NormalRandRead(const vespalib::string & fileName); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize() override; +private: + FastOS_File _file; +}; + +} diff --git a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp index 86bd5aa60cf..ddb9bdbba16 100644 --- a/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp +++ b/searchlib/src/vespa/searchlib/docstore/storebybucket.cpp @@ -3,6 +3,7 @@ #include "storebybucket.h" #include <vespa/vespalib/util/closuretask.h> #include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/data/databuffer.h> namespace search { namespace docstore { diff --git a/searchlib/src/vespa/searchlib/docstore/summaryexceptions.cpp b/searchlib/src/vespa/searchlib/docstore/summaryexceptions.cpp new file mode 100644 index 00000000000..aedd157a4f7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/summaryexceptions.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "summaryexceptions.h" +#include <vespa/fastos/file.h> + +using vespalib::IoException; +using vespalib::make_string; + +namespace search { + +SummaryException::SummaryException(const vespalib::stringref &msg, + FastOS_FileInterface &file, + const vespalib::stringref &location) + : IoException(make_string("%s : Failing file = '%s'. Reason given by OS = '%s'", + msg.c_str(), file.GetFileName(), file.getLastErrorString().c_str()), + getErrorType(file.GetLastError()), location) +{ } + +} diff --git a/searchlib/src/vespa/searchlib/docstore/summaryexceptions.h b/searchlib/src/vespa/searchlib/docstore/summaryexceptions.h new file mode 100644 index 00000000000..52ebf4da00f --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/summaryexceptions.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/util/exceptions.h> + +class FastOS_FileInterface; + +namespace search { + +class SummaryException : public vespalib::IoException +{ +public: + SummaryException(const vespalib::stringref &msg, FastOS_FileInterface & file, const vespalib::stringref &location); +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp index cc27733d235..a774df021ce 100644 --- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp @@ -2,13 +2,15 @@ #include "writeablefilechunk.h" #include "data_store_file_chunk_stats.h" +#include "summaryexceptions.h" #include <vespa/vespalib/util/closuretask.h> #include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/data/fileheader.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/log/log.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/log/log.h> LOG_SETUP(".search.writeablefilechunk"); using vespalib::makeTask; @@ -24,16 +26,57 @@ using search::common::FileHeaderContext; namespace search { -namespace -{ +namespace { const uint64_t Alignment = 4096; const uint64_t headerAlign = 4096; } +/* + * Information about serialized chunk written to .dat file but not yet + * synced. + */ +class PendingChunk +{ + vespalib::nbostream _idx; // Serialized chunk for .idx file + uint64_t _lastSerial; + uint64_t _dataOffset; + uint32_t _dataLen; +public: + typedef std::shared_ptr<PendingChunk> SP; + PendingChunk(uint64_t lastSerial, uint64_t dataOffset, uint32_t dataLen); + ~PendingChunk(void); + vespalib::nbostream & getSerializedIdx(void) { return _idx; } + const vespalib::nbostream & getSerializedIdx(void) const { return _idx; } + uint64_t getDataOffset(void) const { return _dataOffset; } + uint32_t getDataLen(void) const { return _dataLen; } + uint32_t getIdxLen(void) const { return _idx.size(); } + uint64_t getLastSerial(void) const { return _lastSerial; } +}; + +class ProcessedChunk +{ +public: + typedef std::unique_ptr<ProcessedChunk> UP; + ProcessedChunk(uint32_t chunkId, uint32_t alignment) + : _chunkId(chunkId), + _payLoad(0), + _buf(0ul, alignment) + { } + void setPayLoad() { _payLoad = _buf.getDataLen(); } + uint32_t getPayLoad() const { return _payLoad; } + uint32_t getChunkId() const { return _chunkId; } + const vespalib::DataBuffer & getBuf() const { return _buf; } + vespalib::DataBuffer & getBuf() { return _buf; } +private: + uint32_t _chunkId; + uint32_t _payLoad; + vespalib::DataBuffer _buf; +}; + WriteableFileChunk:: -WriteableFileChunk(vespalib::ThreadStackExecutorBase &executor, +WriteableFileChunk(vespalib::ThreadExecutor &executor, FileId fileId, NameId nameId, const vespalib::string &baseName, SerialNum initialSerialNum, @@ -862,26 +905,17 @@ WriteableFileChunk::getStats() const { DataStoreFileChunkStats stats = FileChunk::getStats(); uint64_t serialNum = getSerialNum(); - return DataStoreFileChunkStats(stats.diskUsage(), stats.diskBloat(), - stats.maxBucketSpread(), - serialNum, - stats.lastFlushedSerialNum(), - stats.nameId()); + return DataStoreFileChunkStats(stats.diskUsage(), stats.diskBloat(), stats.maxBucketSpread(), + serialNum, stats.lastFlushedSerialNum(), stats.nameId()); }; -WriteableFileChunk::PendingChunk::PendingChunk(uint64_t lastSerial, - uint64_t dataOffset, - uint32_t dataLen) +PendingChunk::PendingChunk(uint64_t lastSerial, uint64_t dataOffset, uint32_t dataLen) : _idx(), _lastSerial(lastSerial), _dataOffset(dataOffset), _dataLen(dataLen) -{ -} - -WriteableFileChunk::PendingChunk::~PendingChunk(void) -{ -} +{ } +PendingChunk::~PendingChunk() { } } // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h index c56418a2155..62a280361d4 100644 --- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h @@ -3,20 +3,18 @@ #pragma once #include <vespa/searchlib/docstore/filechunk.h> -#include <vespa/vespalib/util/threadstackexecutor.h> +#include <vespa/vespalib/util/threadexecutor.h> #include <vespa/searchlib/transactionlog/syncproxy.h> +#include <vespa/fastos/file.h> #include <map> #include <deque> -namespace search -{ - -namespace common -{ +namespace search { -class FileHeaderContext; +class PendingChunk; +class ProcessedChunk; -} +namespace common { class FileHeaderContext; } class WriteableFileChunk : public FileChunk { @@ -48,7 +46,7 @@ public: public: typedef std::unique_ptr<WriteableFileChunk> UP; - WriteableFileChunk(vespalib::ThreadStackExecutorBase & executor, + WriteableFileChunk(vespalib::ThreadExecutor & executor, FileId fileId, NameId nameId, const vespalib::string & baseName, uint64_t initialSerialNum, @@ -80,57 +78,17 @@ public: static uint64_t writeIdxHeader(const common::FileHeaderContext &fileHeaderContext, FastOS_FileInterface & file); private: - class ProcessedChunk - { - public: - typedef std::unique_ptr<ProcessedChunk> UP; - ProcessedChunk(uint32_t chunkId, uint32_t alignment) - : _chunkId(chunkId), - _payLoad(0), - _buf(0ul, alignment) - { } - void setPayLoad() { _payLoad = _buf.getDataLen(); } - uint32_t getPayLoad() const { return _payLoad; } - uint32_t getChunkId() const { return _chunkId; } - const vespalib::DataBuffer & getBuf() const { return _buf; } - vespalib::DataBuffer & getBuf() { return _buf; } - private: - uint32_t _chunkId; - uint32_t _payLoad; - vespalib::DataBuffer _buf; - }; - typedef std::map<uint32_t, ProcessedChunk::UP> ProcessedChunkMap; - - typedef std::vector<ProcessedChunk::UP> ProcessedChunkQ; + using ProcessedChunkUP = std::unique_ptr<ProcessedChunk>; + typedef std::map<uint32_t, ProcessedChunkUP > ProcessedChunkMap; - /* - * Information about serialized chunk written to .dat file but not yet - * synced. - */ - class PendingChunk - { - vespalib::nbostream _idx; // Serialized chunk for .idx file - uint64_t _lastSerial; - uint64_t _dataOffset; - uint32_t _dataLen; - public: - typedef std::shared_ptr<PendingChunk> SP; - PendingChunk(uint64_t lastSerial, uint64_t dataOffset, uint32_t dataLen); - ~PendingChunk(void); - vespalib::nbostream & getSerializedIdx(void) { return _idx; } - const vespalib::nbostream & getSerializedIdx(void) const { return _idx; } - uint64_t getDataOffset(void) const { return _dataOffset; } - uint32_t getDataLen(void) const { return _dataLen; } - uint32_t getIdxLen(void) const { return _idx.size(); } - uint64_t getLastSerial(void) const { return _lastSerial; } - }; + typedef std::vector<ProcessedChunkUP> ProcessedChunkQ; bool frozen() const override { return _frozen; } void waitForChunkFlushedToDisk(uint32_t chunkId) const; void waitForAllChunksFlushedToDisk() const; void fileWriter(const uint32_t firstChunkId); void internalFlush(uint32_t, uint64_t serialNum); - void enque(ProcessedChunk::UP); + void enque(ProcessedChunkUP); int32_t flushLastIfNonEmpty(bool force); void restart(const vespalib::MonitorGuard & guard, uint32_t nextChunkId); ProcessedChunkQ drainQ(); @@ -161,9 +119,9 @@ private: vespalib::Lock _flushLock; FastOS_File _dataFile; FastOS_File _idxFile; - typedef std::map<uint32_t, Chunk::UP> ChunkMap; + using ChunkMap = std::map<uint32_t, Chunk::UP>; ChunkMap _chunkMap; - typedef std::deque<PendingChunk::SP> PendingChunks; + using PendingChunks = std::deque<std::shared_ptr<PendingChunk>>; PendingChunks _pendingChunks; uint64_t _pendingIdx; uint64_t _pendingDat; @@ -177,7 +135,7 @@ private: bool _writeTaskIsRunning; vespalib::Monitor _writeMonitor; ProcessedChunkQ _writeQ; - vespalib::ThreadStackExecutorBase & _executor; + vespalib::ThreadExecutor & _executor; ProcessedChunkMap _orderedChunks; BucketDensityComputer _bucketMap; }; diff --git a/searchlib/src/vespa/searchlib/util/memoryusage.h b/searchlib/src/vespa/searchlib/util/memoryusage.h index 2d7a389c53e..1cbb1fb0fd1 100644 --- a/searchlib/src/vespa/searchlib/util/memoryusage.h +++ b/searchlib/src/vespa/searchlib/util/memoryusage.h @@ -2,6 +2,8 @@ #pragma once +#include <cstddef> + namespace search { class MemoryUsage { @@ -17,103 +19,29 @@ public: _usedBytes(0), _deadBytes(0), _allocatedBytesOnHold(0) - { - } + { } - MemoryUsage(size_t allocated, - size_t used, - size_t dead, - size_t onHold) + MemoryUsage(size_t allocated, size_t used, size_t dead, size_t onHold) : _allocatedBytes(allocated), _usedBytes(used), _deadBytes(dead), _allocatedBytesOnHold(onHold) - { - } - - size_t - allocatedBytes(void) const - { - return _allocatedBytes; - } - - size_t - usedBytes(void) const - { - return _usedBytes; - } - - size_t - deadBytes(void) const - { - return _deadBytes; - } - - size_t - allocatedBytesOnHold(void) const - { - return _allocatedBytesOnHold; - } - - void - incAllocatedBytes(size_t inc) - { - _allocatedBytes += inc; - } - - void - decAllocatedBytes(size_t dec) - { - _allocatedBytes -= dec; - } - - void - incUsedBytes(size_t inc) - { - _usedBytes += inc; - } - - void - incDeadBytes(size_t inc) - { - _deadBytes += inc; - } - - void - incAllocatedBytesOnHold(size_t inc) - { - _allocatedBytesOnHold += inc; - } - - void - decAllocatedBytesOnHold(size_t inc) - { - _allocatedBytesOnHold -= inc; - } - - void - setAllocatedBytes(size_t alloc) - { - _allocatedBytes = alloc; - } - - void - setUsedBytes(size_t used) - { - _usedBytes = used; - } - - void - setDeadBytes(size_t dead) - { - _deadBytes = dead; - } - - void - setAllocatedBytesOnHold(size_t onHold) - { - _allocatedBytesOnHold = onHold; - } + { } + + size_t allocatedBytes(void) const { return _allocatedBytes; } + size_t usedBytes(void) const { return _usedBytes; } + size_t deadBytes(void) const { return _deadBytes; } + size_t allocatedBytesOnHold(void) const { return _allocatedBytesOnHold; } + void incAllocatedBytes(size_t inc) { _allocatedBytes += inc; } + void decAllocatedBytes(size_t dec) { _allocatedBytes -= dec; } + void incUsedBytes(size_t inc) { _usedBytes += inc; } + void incDeadBytes(size_t inc) { _deadBytes += inc; } + void incAllocatedBytesOnHold(size_t inc) { _allocatedBytesOnHold += inc; } + void decAllocatedBytesOnHold(size_t inc) { _allocatedBytesOnHold -= inc; } + void setAllocatedBytes(size_t alloc) { _allocatedBytes = alloc; } + void setUsedBytes(size_t used) { _usedBytes = used; } + void setDeadBytes(size_t dead) { _deadBytes = dead; } + void setAllocatedBytesOnHold(size_t onHold) { _allocatedBytesOnHold = onHold; } void mergeGenerationHeldBytes(size_t inc) { _allocatedBytes += inc; @@ -121,9 +49,7 @@ public: _allocatedBytesOnHold += inc; } - void - merge(const MemoryUsage & rhs) - { + void merge(const MemoryUsage & rhs) { _allocatedBytes += rhs._allocatedBytes; _usedBytes += rhs._usedBytes; _deadBytes += rhs._deadBytes; @@ -132,4 +58,3 @@ public: }; } // namespace search - |