diff options
author | Geir Storli <geirst@verizonmedia.com> | 2021-09-14 11:52:31 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2021-09-14 14:23:30 +0000 |
commit | dcc1c4f1c1fb93ccfb8a1f30cdd9eeb62799a298 (patch) | |
tree | c95a10584d224c492bb3039374ab2fad9c9adbf2 /searchlib | |
parent | 8ecfc3ac12bc50aedf09a44c4001d9512fdeca94 (diff) |
Refactor to represent files with a generic header in a new class.
Also move DIRECT I/O alignment setting to a common place.
Diffstat (limited to 'searchlib')
22 files changed, 216 insertions, 157 deletions
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index dadf3f21297..c67242ee35a 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -10,16 +10,17 @@ #include <vespa/searchlib/attribute/attributememorysavetarget.h> #include <vespa/searchlib/attribute/attributesaver.h> #include <vespa/searchlib/attribute/i_enum_store_dictionary.h> -#include <vespa/searchlib/queryeval/executeinfo.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/parsequery/parse.h> +#include <vespa/searchlib/queryeval/executeinfo.h> +#include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/testkit/testapp.h> -#include <vespa/searchlib/util/bufferwriter.h> #include <vespa/vespalib/util/compress.h> #include <vespa/vespalib/util/size_literals.h> -#include <vespa/vespalib/data/databuffer.h> #include <limits> #include <iostream> @@ -57,7 +58,7 @@ public: } virtual Buffer allocBuf(size_t size) override { - return std::make_unique<BufferBuf>(size, 4_Ki); + return std::make_unique<BufferBuf>(size, search::FileSettings::DIRECTIO_ALIGNMENT); } virtual void writeBuf(Buffer buf_in) override { diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp index 13300fb47f2..85d41bce45a 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp @@ -1,14 +1,15 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "attributefilewriter.h" -#include "attributefilebufferwriter.h" #include "attribute_header.h" -#include <vespa/vespalib/data/fileheader.h> +#include "attributefilebufferwriter.h" +#include "attributefilewriter.h" +#include <vespa/fastos/file.h> #include <vespa/searchlib/common/fileheadercontext.h> #include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> +#include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/size_literals.h> -#include <vespa/fastos/file.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.attribute.attributefilewriter"); @@ -20,15 +21,12 @@ namespace search { namespace { -const uint32_t headerAlign = 4_Ki; -const uint32_t MIN_ALIGNMENT = 4_Ki; - void writeDirectIOAligned(FastOS_FileInterface &file, const void *buf, size_t length) { const char * data(static_cast<const char *>(buf)); size_t remaining(length); - for (size_t maxChunk(2_Mi); maxChunk >= MIN_ALIGNMENT; maxChunk >>= 1) { + for (size_t maxChunk(2_Mi); maxChunk >= FileSettings::DIRECTIO_ALIGNMENT; maxChunk >>= 1) { for ( ; remaining > maxChunk; remaining -= maxChunk, data += maxChunk) { file.WriteBuf(data, maxChunk); } @@ -41,7 +39,7 @@ writeDirectIOAligned(FastOS_FileInterface &file, const void *buf, size_t length) void updateHeader(const vespalib::string &name, uint64_t fileBitSize) { - vespalib::FileHeader h(headerAlign); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; f.OpenReadWrite(name.c_str()); h.readFile(f); @@ -122,11 +120,11 @@ AttributeFileWriter::open(const vespalib::string &fileName) void AttributeFileWriter::writeHeader() { - vespalib::FileHeader header(headerAlign); + vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); _fileHeaderContext.addTags(header, _file->GetFileName()); addTags(header); size_t headerLen = header.writeFile(*_file); - assert((headerLen % MIN_ALIGNMENT) == 0); + assert((headerLen % FileSettings::DIRECTIO_ALIGNMENT) == 0); _fileBitSize = headerLen * 8; } @@ -141,7 +139,7 @@ AttributeFileWriter::addTags(vespalib::GenericHeader &header) AttributeFileWriter::Buffer AttributeFileWriter::allocBuf(size_t size) { - return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT); + return std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT); } void diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp index 7662df9443e..5522229b8c9 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp @@ -1,18 +1,13 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "attributememoryfilewriter.h" #include "attributememoryfilebufferwriter.h" +#include "attributememoryfilewriter.h" +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/size_literals.h> namespace search { -namespace { - -const uint32_t MIN_ALIGNMENT = 4_Ki; - -} - AttributeMemoryFileWriter::AttributeMemoryFileWriter() : IAttributeFileWriter(), _bufs() @@ -26,7 +21,7 @@ AttributeMemoryFileWriter::~AttributeMemoryFileWriter() = default; AttributeMemoryFileWriter::Buffer AttributeMemoryFileWriter::allocBuf(size_t size) { - return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT); + return std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT); } diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 5de342bf380..10a4d8addc2 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -20,6 +20,7 @@ #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/query/query_term_decoder.h> #include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/searchlib/util/logutil.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/size_literals.h> @@ -47,8 +48,6 @@ const vespalib::string dataTypeTag = "datatype"; const vespalib::string collectionTypeTag = "collectiontype"; const vespalib::string docIdLimitTag = "docIdLimit"; -constexpr size_t DIRECTIO_ALIGNMENT(4_Ki); - } namespace search { @@ -351,7 +350,7 @@ AttributeVector::isEnumeratedSaveFormat() const { vespalib::string datName(vespalib::make_string("%s.dat", getBaseFileName().c_str())); Fast_BufferedFile datFile; - vespalib::FileHeader datHeader(DIRECTIO_ALIGNMENT); + vespalib::FileHeader datHeader(FileSettings::DIRECTIO_ALIGNMENT); if ( ! datFile.OpenReadOnly(datName.c_str()) ) { LOG(error, "could not open %s: %s", datFile.GetFileName(), getLastErrorString().c_str()); throw IllegalStateException(make_string("Failed opening attribute data file '%s' for reading", @@ -648,7 +647,7 @@ IExtendAttribute *AttributeVector::getExtendInterface() { return nullptr; } uint64_t AttributeVector::getEstimatedSaveByteSize() const { - uint64_t headerSize = 4_Ki; + uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT; uint64_t totalValueCount = _status.getNumValues(); uint64_t uniqueValueCount = _status.getNumUniqueValues(); uint64_t docIdLimit = getCommittedDocIdLimit(); diff --git a/searchlib/src/vespa/searchlib/attribute/primitivereader.h b/searchlib/src/vespa/searchlib/attribute/primitivereader.h index cf8821526d0..0499138bc49 100644 --- a/searchlib/src/vespa/searchlib/attribute/primitivereader.h +++ b/searchlib/src/vespa/searchlib/attribute/primitivereader.h @@ -13,7 +13,7 @@ namespace search { public: PrimitiveReader(AttributeVector &attr) : ReaderBase(attr), - _datReader(*_datFile) + _datReader(_datFile.file()) { } virtual ~PrimitiveReader() { } diff --git a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp index b55ba221a42..33cec01fb0a 100644 --- a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp @@ -19,8 +19,6 @@ const vespalib::string versionTag = "version"; const vespalib::string docIdLimitTag = "docIdLimit"; const vespalib::string createSerialNumTag = "createSerialNum"; -constexpr size_t DIRECTIO_ALIGNMENT(4_Ki); - uint64_t extractCreateSerialNum(const vespalib::GenericHeader &header) { @@ -35,53 +33,38 @@ ReaderBase::ReaderBase(AttributeVector &attr) attribute::LoadUtils::openWeight(attr) : std::unique_ptr<Fast_BufferedFile>()), _idxFile(attr.hasMultiValue() ? attribute::LoadUtils::openIDX(attr) : std::unique_ptr<Fast_BufferedFile>()), - _weightReader(*_weightFile), - _idxReader(*_idxFile), - _enumReader(*_datFile), + _weightReader(_weightFile.file()), + _idxReader(_idxFile.file()), + _enumReader(_datFile.file()), _currIdx(0), - _datHeaderLen(0u), - _idxHeaderLen(0u), - _weightHeaderLen(0u), _createSerialNum(0u), _fixedWidth(attr.getFixedWidth()), _enumerated(false), _hasLoadData(false), _version(0), - _docIdLimit(0), - _datHeader(DIRECTIO_ALIGNMENT), - _datFileSize(0), - _idxFileSize(0) + _docIdLimit(0) { - _datHeaderLen = _datHeader.readFile(*_datFile); - _datFile->SetPosition(_datHeaderLen); - if (!attr.headerTypeOK(_datHeader) || - !extractFileSize(_datHeader, *_datFile, _datFileSize)) { - _datFile->Close(); + if (!attr.headerTypeOK(_datFile.header())) { + _datFile.close(); } - _createSerialNum = extractCreateSerialNum(_datHeader); - if (_datHeader.hasTag(versionTag)) { - _version = _datHeader.getTag(versionTag).asInteger(); + _createSerialNum = extractCreateSerialNum(_datFile.header()); + if (_datFile.header().hasTag(versionTag)) { + _version = _datFile.header().getTag(versionTag).asInteger(); } - _docIdLimit = _datHeader.getTag(docIdLimitTag).asInteger(); + _docIdLimit = _datFile.header().getTag(docIdLimitTag).asInteger(); if (hasIdx()) { - vespalib::FileHeader idxHeader(DIRECTIO_ALIGNMENT); - _idxHeaderLen = idxHeader.readFile(*_idxFile); - _idxFile->SetPosition(_idxHeaderLen); - if (!attr.headerTypeOK(idxHeader) || - !extractFileSize(idxHeader, *_idxFile, _idxFileSize)) { - _idxFile->Close(); + if (!attr.headerTypeOK(_idxFile.header())) { + _idxFile.close(); } else { _currIdx = _idxReader.readHostOrder(); } } if (hasWeight()) { - vespalib::FileHeader weightHeader(DIRECTIO_ALIGNMENT); - _weightHeaderLen = weightHeader.readFile(*_weightFile); - _weightFile->SetPosition(_weightHeaderLen); - if (!attr.headerTypeOK(weightHeader)) - _weightFile->Close(); + if (!attr.headerTypeOK(_weightFile.header())) { + _weightFile.close(); + } } - if (hasData() && AttributeVector::isEnumerated(_datHeader)) { + if (hasData() && AttributeVector::isEnumerated(_datFile.header())) { _enumerated = true; } _hasLoadData = hasData() && @@ -93,40 +76,30 @@ ReaderBase::~ReaderBase() = default; bool ReaderBase::hasWeight() const { - return _weightFile.get() && _weightFile->IsOpened(); + return _weightFile.valid(); } bool ReaderBase::hasIdx() const { - return _idxFile.get() && _idxFile->IsOpened(); + return _idxFile.valid(); } bool ReaderBase::hasData() const { - return _datFile.get() && _datFile->IsOpened(); -} - -bool -ReaderBase:: -extractFileSize(const vespalib::GenericHeader &header, - FastOS_FileInterface &file, uint64_t &fileSize) -{ - fileSize = file.GetSize(); - return FileSizeCalculator::extractFileSize(header, header.getSize(), - file.GetFileName(), fileSize); + return _datFile.valid(); } void ReaderBase::rewind() { - _datFile->SetPosition(_datHeaderLen); + _datFile.rewind(); _currIdx = 0; if (hasIdx()) { - _idxFile->SetPosition(_idxHeaderLen); + _idxFile.rewind(); _currIdx = _idxReader.readHostOrder(); } if (hasWeight()) { - _weightFile->SetPosition(_weightHeaderLen); + _weightFile.rewind(); } } @@ -137,13 +110,13 @@ ReaderBase::getNumValues() return getEnumCount(); } else { if (_fixedWidth > 0) { - size_t dataSize(_datFileSize - _datHeaderLen); + size_t dataSize = _datFile.data_size(); assert((dataSize % _fixedWidth) == 0); return dataSize / _fixedWidth; } else { // TODO. This limits the number of multivalues to 2^32-1 // This is assert during write, so this should never be a problem here. - _idxFile->SetPosition(_idxFileSize - 4); + _idxFile.file().SetPosition(_idxFile.file_size() - 4); size_t numValues = _idxReader.readHostOrder(); rewind(); return numValues; diff --git a/searchlib/src/vespa/searchlib/attribute/readerbase.h b/searchlib/src/vespa/searchlib/attribute/readerbase.h index a7685e4532a..c439fb81738 100644 --- a/searchlib/src/vespa/searchlib/attribute/readerbase.h +++ b/searchlib/src/vespa/searchlib/attribute/readerbase.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/searchlib/util/file_with_header.h> #include <vespa/searchlib/util/fileutil.h> #include <cassert> @@ -21,18 +22,15 @@ public: bool hasData() const; uint32_t getNumIdx() const { - return (_idxFileSize - _idxHeaderLen) /sizeof(uint32_t); + return (_idxFile.data_size()) /sizeof(uint32_t); } size_t getEnumCount() const { - size_t dataSize(_datFileSize - _datHeaderLen); + size_t dataSize = _datFile.data_size(); assert((dataSize % sizeof(uint32_t)) == 0); return dataSize / sizeof(uint32_t); } - static bool - extractFileSize(const vespalib::GenericHeader &header, FastOS_FileInterface &file, uint64_t &fileSize); - size_t getNumValues(); int32_t getNextWeight() { return _weightReader.readHostOrder(); } uint32_t getNextEnum() { return _enumReader.readHostOrder(); } @@ -43,32 +41,26 @@ public: uint32_t getVersion() const { return _version; } uint32_t getDocIdLimit() const { return _docIdLimit; } const vespalib::GenericHeader &getDatHeader() const { - return _datHeader; + return _datFile.header(); } protected: - std::unique_ptr<FastOS_FileInterface> _datFile; + FileWithHeader _datFile; private: - std::unique_ptr<FastOS_FileInterface> _weightFile; - std::unique_ptr<FastOS_FileInterface> _idxFile; + FileWithHeader _weightFile; + FileWithHeader _idxFile; FileReader<int32_t> _weightReader; FileReader<uint32_t> _idxReader; FileReader<uint32_t> _enumReader; uint32_t _currIdx; - uint32_t _datHeaderLen; - uint32_t _idxHeaderLen; - uint32_t _weightHeaderLen; uint64_t _createSerialNum; size_t _fixedWidth; bool _enumerated; bool _hasLoadData; uint32_t _version; uint32_t _docIdLimit; - vespalib::FileHeader _datHeader; - uint64_t _datFileSize; - uint64_t _idxFileSize; protected: size_t getDataCountHelper(size_t elemSize) const { - size_t dataSize(_datFileSize - _datHeaderLen); + size_t dataSize = _datFile.data_size(); return dataSize / elemSize; } }; diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp index 42bc8438d8c..42418c736bf 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp @@ -1,13 +1,14 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "singleboolattribute.h" #include "attributevector.hpp" -#include "primitivereader.h" #include "iattributesavetarget.h" #include "ipostinglistsearchcontext.h" +#include "primitivereader.h" +#include "singleboolattribute.h" +#include <vespa/searchlib/common/bitvectoriterator.h> #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> -#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/size_literals.h> @@ -245,7 +246,7 @@ SingleBoolAttribute::onShrinkLidSpace() uint64_t SingleBoolAttribute::getEstimatedSaveByteSize() const { - constexpr uint64_t headerSize = 4_Ki + sizeof(uint32_t); + constexpr uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT + sizeof(uint32_t); return headerSize + _bv.sizeBytes(); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp index 92e25097f40..359da48fb59 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp @@ -1,7 +1,8 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "singlenumericattributesaver.h" #include "iattributesavetarget.h" +#include "singlenumericattributesaver.h" +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/size_literals.h> @@ -9,20 +10,13 @@ using vespalib::GenerationHandler; namespace search { -namespace { - -const uint32_t MIN_ALIGNMENT = 4_Ki; - -} - - SingleValueNumericAttributeSaver:: SingleValueNumericAttributeSaver(const attribute::AttributeHeader &header, const void *data, size_t size) : AttributeSaver(vespalib::GenerationHandler::Guard(), header), _buf() { - _buf = std::make_unique<BufferBuf>(size, MIN_ALIGNMENT); + _buf = std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT); assert(_buf->getFreeLen() >= size); if (size > 0) { memcpy(_buf->getFree(), data, size); @@ -31,11 +25,8 @@ SingleValueNumericAttributeSaver(const attribute::AttributeHeader &header, assert(_buf->getDataLen() == size); } - SingleValueNumericAttributeSaver::~SingleValueNumericAttributeSaver() = default; - - bool SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) { @@ -43,5 +34,4 @@ SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) return true; } - } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp index cdcc70cc01d..72aadbc6d93 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -1,12 +1,13 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "singlesmallnumericattribute.h" -#include "attributevector.hpp" -#include "primitivereader.h" #include "attributeiterators.hpp" +#include "attributevector.hpp" #include "iattributesavetarget.h" +#include "primitivereader.h" +#include "singlesmallnumericattribute.h" #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/size_literals.h> @@ -200,7 +201,7 @@ SingleValueSmallNumericAttribute::onShrinkLidSpace() uint64_t SingleValueSmallNumericAttribute::getEstimatedSaveByteSize() const { - uint64_t headerSize = 4_Ki; + uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT; const size_t numDocs(getCommittedDocIdLimit()); const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift); const size_t sz((numDataWords + 1) * sizeof(Word)); diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp index 8c1cbc3d672..7f5ae051978 100644 --- a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp @@ -3,6 +3,7 @@ #include "sourceselector.h" #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/util/size_literals.h> using search::queryeval::Source; @@ -83,7 +84,7 @@ SourceSelector::LoadInfo::load() // XXX no checking for success file.ReadOpen(fileName.c_str()); - FileHeader fileHeader(4_Ki); + FileHeader fileHeader(FileSettings::DIRECTIO_ALIGNMENT); fileHeader.readFile(file); if (fileHeader.hasTag(defaultSourceTag)) { _header._defaultSource = fileHeader.getTag(defaultSourceTag).asInteger(); diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 97edece249e..6f551222286 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -4,6 +4,7 @@ #include "allocatedbitvector.h" #include "growablebitvector.h" #include "partialbitvector.h" +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/hwaccelrated/iaccelrated.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/size_literals.h> @@ -34,7 +35,6 @@ void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & } constexpr size_t MMAP_LIMIT = 256_Mi; -constexpr size_t DIRECTIO_ALIGNMENT = 4_Ki; } @@ -341,7 +341,8 @@ BitVector::create(Index numberOfElements, FastOS_FileInterface &file, size_t vectorsize = getFileBytes(numberOfElements); file.DirectIOPadding(offset, vectorsize, padbefore, padafter); assert((padbefore & (getAlignment() - 1)) == 0); - AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter, MMAP_LIMIT, DIRECTIO_ALIGNMENT); + AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter, + MMAP_LIMIT, FileSettings::DIRECTIO_ALIGNMENT); void * alignedBuffer = alloc.get(); file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore); bv = std::make_unique<AllocatedBitVector>(numberOfElements, std::move(alloc), padbefore); diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp index 828e38b5267..2e4218ed36c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -1,9 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bitvectorfile.h" -#include <vespa/searchlib/index/bitvectorkeys.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/index/bitvectorkeys.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/size_literals.h> #include <cassert> @@ -25,8 +26,6 @@ readHeader(vespalib::FileHeader &h, file.Close(); } -const size_t FILE_HEADERSIZE_ALIGNMENT = 4_Ki; - } BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope) @@ -93,7 +92,7 @@ BitVectorFileWrite::open(const vespalib::string &name, void BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext) { - vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); // 64 byte alignment on bitvector.dat header + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); typedef vespalib::GenericHeader::Tag Tag; fileHeaderContext.addTags(h, _datFile->GetFileName()); h.putTag(Tag("docIdLimit", _docIdLimit)); @@ -110,7 +109,7 @@ BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext) void BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize) { - vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); typedef vespalib::GenericHeader::Tag Tag; readHeader(h, _datFile->GetFileName()); FileHeaderContext::setFreezeTime(h); diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp index 95d2b286d7f..176840c4903 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -1,9 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "bitvectoridxfile.h" -#include <vespa/searchlib/index/bitvectorkeys.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/index/bitvectorkeys.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/util/size_literals.h> #include <cassert> @@ -24,8 +25,6 @@ readHeader(vespalib::FileHeader &h, const vespalib::string &name) file.Close(); } -const size_t FILE_HEADERSIZE_ALIGNMENT = 4_Ki; - } BitVectorIdxFileWrite::BitVectorIdxFileWrite(BitVectorKeyScope scope) @@ -90,7 +89,7 @@ BitVectorIdxFileWrite::open(const vespalib::string &name, void BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext) { - vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); typedef vespalib::GenericHeader::Tag Tag; fileHeaderContext.addTags(h, _idxFile->GetFileName()); h.putTag(Tag("docIdLimit", _docIdLimit)); @@ -108,7 +107,7 @@ BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext) void BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize) { - vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); typedef vespalib::GenericHeader::Tag Tag; readHeader(h, _idxFile->GetFileName()); FileHeaderContext::setFreezeTime(h); diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp index 64fb6142db7..70a67705687 100644 --- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -2,7 +2,7 @@ #include "pagedict4file.h" #include <vespa/searchlib/common/fileheadercontext.h> -#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/io/fileutil.h> #include <vespa/vespalib/util/size_literals.h> @@ -36,12 +36,6 @@ using vespalib::getLastErrorString; namespace search::diskindex { -namespace { - -const uint32_t headerAlign = 4_Ki; - -} - PageDict4FileSeqRead::PageDict4FileSeqRead() : _pReader(nullptr), _ssReader(nullptr), @@ -467,7 +461,7 @@ PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext) // subheader only written to SS file. typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(headerAlign); + vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); fileHeaderContext.addTags(header, _pfile.GetFileName()); header.putTag(Tag("frozen", 0)); @@ -499,7 +493,7 @@ PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext) // subheader only written to SS file. typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(headerAlign); + vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); fileHeaderContext.addTags(header, _spfile.GetFileName()); header.putTag(Tag("frozen", 0)); @@ -529,7 +523,7 @@ PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext) ComprFileWriteContext &wc = _ssWriteContext; typedef vespalib::GenericHeader::Tag Tag; - vespalib::FileHeader header(headerAlign); + vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT); fileHeaderContext.addTags(header, _ssfile.GetFileName()); header.putTag(Tag("frozen", 0)); @@ -557,7 +551,7 @@ PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext) void PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize) { - vespalib::FileHeader h(headerAlign); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; f.OpenReadWrite(_pfile.GetFileName()); h.readFile(f); @@ -574,7 +568,7 @@ PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize) void PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize) { - vespalib::FileHeader h(headerAlign); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; f.OpenReadWrite(_spfile.GetFileName()); h.readFile(f); @@ -591,7 +585,7 @@ PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize) void PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize) { - vespalib::FileHeader h(headerAlign); + vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); FastOS_File f; f.OpenReadWrite(_ssfile.GetFileName()); h.readFile(f); diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp index f498b93ca14..5740092269d 100644 --- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp @@ -1,16 +1,17 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "writeablefilechunk.h" #include "data_store_file_chunk_stats.h" #include "summaryexceptions.h" -#include <vespa/vespalib/util/lambdatask.h> -#include <vespa/vespalib/util/array.hpp> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/vespalib/data/fileheader.h> -#include <vespa/vespalib/data/databuffer.h> +#include "writeablefilechunk.h" #include <vespa/searchlib/common/fileheadercontext.h> -#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/searchlib/util/file_settings.h> +#include <vespa/vespalib/data/databuffer.h> +#include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/util/array.hpp> +#include <vespa/vespalib/util/lambdatask.h> +#include <vespa/vespalib/util/size_literals.h> #include <vespa/log/log.h> LOG_SETUP(".search.writeablefilechunk"); @@ -27,8 +28,7 @@ namespace search { namespace { -const uint64_t Alignment = 4_Ki; -const uint64_t headerAlign = 4_Ki; +const size_t Alignment = FileSettings::DIRECTIO_ALIGNMENT; } @@ -785,7 +785,7 @@ void WriteableFileChunk::writeDataHeader(const FileHeaderContext &fileHeaderContext) { typedef FileHeader::Tag Tag; - FileHeader h(headerAlign); + FileHeader h(FileSettings::DIRECTIO_ALIGNMENT); assert(_dataFile.IsOpened()); assert(_dataFile.IsWriteMode()); assert(_dataFile.GetPosition() == 0); diff --git a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h index 7c34b60e93d..f96b4e46e98 100644 --- a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h +++ b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h @@ -17,10 +17,10 @@ private: public: BlobSequenceReader(AttributeVector &attr) : ReaderBase(attr), - _sizeReader(*_datFile) + _sizeReader(_datFile.file()) { } uint32_t getNextSize() { return _sizeReader.readHostOrder(); } - void readBlob(void *buf, size_t len) { _datFile->ReadBuf(buf, len); } + void readBlob(void *buf, size_t len) { _datFile.file().ReadBuf(buf, len); } }; } // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index fd86fbf1c73..00aede95ca4 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -43,7 +43,7 @@ public: BlobSequenceReader(AttributeVector &attr); ~BlobSequenceReader(); bool is_present(); - void readTensor(void *buf, size_t len) { _datFile->ReadBuf(buf, len); } + void readTensor(void *buf, size_t len) { _datFile.file().ReadBuf(buf, len); } }; BlobSequenceReader::BlobSequenceReader(AttributeVector &attr) @@ -55,7 +55,7 @@ BlobSequenceReader::~BlobSequenceReader() = default; bool BlobSequenceReader::is_present() { unsigned char detect; - _datFile->ReadBuf(&detect, sizeof(detect)); + _datFile.file().ReadBuf(&detect, sizeof(detect)); if (detect == tensorIsNotPresent) { return false; } diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index 320a6480202..1b311e8d639 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(searchlib_util OBJECT comprfile.cpp dirtraverse.cpp drainingbufferwriter.cpp + file_with_header.cpp filealign.cpp fileheadertk.cpp filekit.cpp diff --git a/searchlib/src/vespa/searchlib/util/file_settings.h b/searchlib/src/vespa/searchlib/util/file_settings.h new file mode 100644 index 00000000000..1bdd1a56cda --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/file_settings.h @@ -0,0 +1,17 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/util/size_literals.h> + +namespace search { + +/** + * Common settings that is used for file I/O. + */ +struct FileSettings { + // The alignment (in bytes) used for DIRECT I/O write and read. + static constexpr size_t DIRECTIO_ALIGNMENT = 4_Ki; +}; + +} diff --git a/searchlib/src/vespa/searchlib/util/file_with_header.cpp b/searchlib/src/vespa/searchlib/util/file_with_header.cpp new file mode 100644 index 00000000000..b004f2b29d5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/file_with_header.cpp @@ -0,0 +1,59 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "file_settings.h" +#include "file_with_header.h" +#include "filesizecalculator.h" +#include <vespa/fastos/file.h> +#include <vespa/vespalib/util/size_literals.h> + +namespace search { + +namespace { + +bool +extract_file_size(const vespalib::GenericHeader& header, + FastOS_FileInterface& file, uint64_t& file_size) +{ + file_size = file.GetSize(); + return FileSizeCalculator::extractFileSize(header, header.getSize(),file.GetFileName(), file_size); +} + +} + +FileWithHeader::FileWithHeader(std::unique_ptr<FastOS_FileInterface> file_in) + : _file(std::move(file_in)), + _header(FileSettings::DIRECTIO_ALIGNMENT), + _header_len(0), + _file_size(0) +{ + if (valid()) { + _header_len = _header.readFile(*_file); + _file->SetPosition(_header_len); + if (!extract_file_size(_header, *_file, _file_size)) { + _file->Close(); + } + } +} + +FileWithHeader::~FileWithHeader() = default; + +bool +FileWithHeader::valid() const +{ + return _file && _file->IsOpened(); +} + +void +FileWithHeader::rewind() +{ + _file->SetPosition(_header_len); +} + +void +FileWithHeader::close() +{ + _file->Close(); +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/file_with_header.h b/searchlib/src/vespa/searchlib/util/file_with_header.h new file mode 100644 index 00000000000..4432b76be67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/file_with_header.h @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/data/fileheader.h> +#include <memory> + +class FastOS_FileInterface; + +namespace search { + +/** + * Class that encapsulates a file containing a generic file header, followed by binary data. + * + * After construction the file is positioned at the start of the binary data. + * It's assumed that the file was written using FileSettings::DIRECTIO_ALIGNMENT. + */ +class FileWithHeader { +private: + std::unique_ptr<FastOS_FileInterface> _file; + vespalib::FileHeader _header; + uint64_t _header_len; + uint64_t _file_size; + +public: + FileWithHeader(std::unique_ptr<FastOS_FileInterface> file_in); + ~FileWithHeader(); + FastOS_FileInterface& file() const { return *_file; } + const vespalib::GenericHeader& header() const { return _header; } + uint64_t file_size() const { return _file_size; } + uint64_t data_size() const { return _file_size - _header_len; } + + bool valid() const; + void rewind(); + void close(); +}; + +} |