summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2021-09-14 11:52:31 +0000
committerGeir Storli <geirst@verizonmedia.com>2021-09-14 14:23:30 +0000
commitdcc1c4f1c1fb93ccfb8a1f30cdd9eeb62799a298 (patch)
treec95a10584d224c492bb3039374ab2fad9c9adbf2 /searchlib
parent8ecfc3ac12bc50aedf09a44c4001d9512fdeca94 (diff)
Refactor to represent files with a generic header in a new class.
Also move DIRECT I/O alignment setting to a common place.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/primitivereader.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/readerbase.cpp75
-rw-r--r--searchlib/src/vespa/searchlib/attribute/readerbase.h24
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/sourceselector.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/util/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/util/file_settings.h17
-rw-r--r--searchlib/src/vespa/searchlib/util/file_with_header.cpp59
-rw-r--r--searchlib/src/vespa/searchlib/util/file_with_header.h38
22 files changed, 216 insertions, 157 deletions
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
index dadf3f21297..c67242ee35a 100644
--- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -10,16 +10,17 @@
#include <vespa/searchlib/attribute/attributememorysavetarget.h>
#include <vespa/searchlib/attribute/attributesaver.h>
#include <vespa/searchlib/attribute/i_enum_store_dictionary.h>
-#include <vespa/searchlib/queryeval/executeinfo.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/parsequery/parse.h>
+#include <vespa/searchlib/queryeval/executeinfo.h>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/searchlib/util/bufferwriter.h>
#include <vespa/vespalib/util/compress.h>
#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/vespalib/data/databuffer.h>
#include <limits>
#include <iostream>
@@ -57,7 +58,7 @@ public:
}
virtual Buffer allocBuf(size_t size) override {
- return std::make_unique<BufferBuf>(size, 4_Ki);
+ return std::make_unique<BufferBuf>(size, search::FileSettings::DIRECTIO_ALIGNMENT);
}
virtual void writeBuf(Buffer buf_in) override {
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
index 13300fb47f2..85d41bce45a 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
@@ -1,14 +1,15 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "attributefilewriter.h"
-#include "attributefilebufferwriter.h"
#include "attribute_header.h"
-#include <vespa/vespalib/data/fileheader.h>
+#include "attributefilebufferwriter.h"
+#include "attributefilewriter.h"
+#include <vespa/fastos/file.h>
#include <vespa/searchlib/common/fileheadercontext.h>
#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/fastos/file.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.attributefilewriter");
@@ -20,15 +21,12 @@ namespace search {
namespace {
-const uint32_t headerAlign = 4_Ki;
-const uint32_t MIN_ALIGNMENT = 4_Ki;
-
void
writeDirectIOAligned(FastOS_FileInterface &file, const void *buf, size_t length)
{
const char * data(static_cast<const char *>(buf));
size_t remaining(length);
- for (size_t maxChunk(2_Mi); maxChunk >= MIN_ALIGNMENT; maxChunk >>= 1) {
+ for (size_t maxChunk(2_Mi); maxChunk >= FileSettings::DIRECTIO_ALIGNMENT; maxChunk >>= 1) {
for ( ; remaining > maxChunk; remaining -= maxChunk, data += maxChunk) {
file.WriteBuf(data, maxChunk);
}
@@ -41,7 +39,7 @@ writeDirectIOAligned(FastOS_FileInterface &file, const void *buf, size_t length)
void
updateHeader(const vespalib::string &name, uint64_t fileBitSize)
{
- vespalib::FileHeader h(headerAlign);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
FastOS_File f;
f.OpenReadWrite(name.c_str());
h.readFile(f);
@@ -122,11 +120,11 @@ AttributeFileWriter::open(const vespalib::string &fileName)
void
AttributeFileWriter::writeHeader()
{
- vespalib::FileHeader header(headerAlign);
+ vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
_fileHeaderContext.addTags(header, _file->GetFileName());
addTags(header);
size_t headerLen = header.writeFile(*_file);
- assert((headerLen % MIN_ALIGNMENT) == 0);
+ assert((headerLen % FileSettings::DIRECTIO_ALIGNMENT) == 0);
_fileBitSize = headerLen * 8;
}
@@ -141,7 +139,7 @@ AttributeFileWriter::addTags(vespalib::GenericHeader &header)
AttributeFileWriter::Buffer
AttributeFileWriter::allocBuf(size_t size)
{
- return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+ return std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT);
}
void
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
index 7662df9443e..5522229b8c9 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
@@ -1,18 +1,13 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "attributememoryfilewriter.h"
#include "attributememoryfilebufferwriter.h"
+#include "attributememoryfilewriter.h"
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/size_literals.h>
namespace search {
-namespace {
-
-const uint32_t MIN_ALIGNMENT = 4_Ki;
-
-}
-
AttributeMemoryFileWriter::AttributeMemoryFileWriter()
: IAttributeFileWriter(),
_bufs()
@@ -26,7 +21,7 @@ AttributeMemoryFileWriter::~AttributeMemoryFileWriter() = default;
AttributeMemoryFileWriter::Buffer
AttributeMemoryFileWriter::allocBuf(size_t size)
{
- return std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+ return std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index 5de342bf380..10a4d8addc2 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -20,6 +20,7 @@
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/query/query_term_decoder.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/searchlib/util/logutil.h>
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -47,8 +48,6 @@ const vespalib::string dataTypeTag = "datatype";
const vespalib::string collectionTypeTag = "collectiontype";
const vespalib::string docIdLimitTag = "docIdLimit";
-constexpr size_t DIRECTIO_ALIGNMENT(4_Ki);
-
}
namespace search {
@@ -351,7 +350,7 @@ AttributeVector::isEnumeratedSaveFormat() const
{
vespalib::string datName(vespalib::make_string("%s.dat", getBaseFileName().c_str()));
Fast_BufferedFile datFile;
- vespalib::FileHeader datHeader(DIRECTIO_ALIGNMENT);
+ vespalib::FileHeader datHeader(FileSettings::DIRECTIO_ALIGNMENT);
if ( ! datFile.OpenReadOnly(datName.c_str()) ) {
LOG(error, "could not open %s: %s", datFile.GetFileName(), getLastErrorString().c_str());
throw IllegalStateException(make_string("Failed opening attribute data file '%s' for reading",
@@ -648,7 +647,7 @@ IExtendAttribute *AttributeVector::getExtendInterface() { return nullptr; }
uint64_t
AttributeVector::getEstimatedSaveByteSize() const
{
- uint64_t headerSize = 4_Ki;
+ uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT;
uint64_t totalValueCount = _status.getNumValues();
uint64_t uniqueValueCount = _status.getNumUniqueValues();
uint64_t docIdLimit = getCommittedDocIdLimit();
diff --git a/searchlib/src/vespa/searchlib/attribute/primitivereader.h b/searchlib/src/vespa/searchlib/attribute/primitivereader.h
index cf8821526d0..0499138bc49 100644
--- a/searchlib/src/vespa/searchlib/attribute/primitivereader.h
+++ b/searchlib/src/vespa/searchlib/attribute/primitivereader.h
@@ -13,7 +13,7 @@ namespace search {
public:
PrimitiveReader(AttributeVector &attr)
: ReaderBase(attr),
- _datReader(*_datFile)
+ _datReader(_datFile.file())
{ }
virtual ~PrimitiveReader() { }
diff --git a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp
index b55ba221a42..33cec01fb0a 100644
--- a/searchlib/src/vespa/searchlib/attribute/readerbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/readerbase.cpp
@@ -19,8 +19,6 @@ const vespalib::string versionTag = "version";
const vespalib::string docIdLimitTag = "docIdLimit";
const vespalib::string createSerialNumTag = "createSerialNum";
-constexpr size_t DIRECTIO_ALIGNMENT(4_Ki);
-
uint64_t
extractCreateSerialNum(const vespalib::GenericHeader &header)
{
@@ -35,53 +33,38 @@ ReaderBase::ReaderBase(AttributeVector &attr)
attribute::LoadUtils::openWeight(attr) : std::unique_ptr<Fast_BufferedFile>()),
_idxFile(attr.hasMultiValue() ?
attribute::LoadUtils::openIDX(attr) : std::unique_ptr<Fast_BufferedFile>()),
- _weightReader(*_weightFile),
- _idxReader(*_idxFile),
- _enumReader(*_datFile),
+ _weightReader(_weightFile.file()),
+ _idxReader(_idxFile.file()),
+ _enumReader(_datFile.file()),
_currIdx(0),
- _datHeaderLen(0u),
- _idxHeaderLen(0u),
- _weightHeaderLen(0u),
_createSerialNum(0u),
_fixedWidth(attr.getFixedWidth()),
_enumerated(false),
_hasLoadData(false),
_version(0),
- _docIdLimit(0),
- _datHeader(DIRECTIO_ALIGNMENT),
- _datFileSize(0),
- _idxFileSize(0)
+ _docIdLimit(0)
{
- _datHeaderLen = _datHeader.readFile(*_datFile);
- _datFile->SetPosition(_datHeaderLen);
- if (!attr.headerTypeOK(_datHeader) ||
- !extractFileSize(_datHeader, *_datFile, _datFileSize)) {
- _datFile->Close();
+ if (!attr.headerTypeOK(_datFile.header())) {
+ _datFile.close();
}
- _createSerialNum = extractCreateSerialNum(_datHeader);
- if (_datHeader.hasTag(versionTag)) {
- _version = _datHeader.getTag(versionTag).asInteger();
+ _createSerialNum = extractCreateSerialNum(_datFile.header());
+ if (_datFile.header().hasTag(versionTag)) {
+ _version = _datFile.header().getTag(versionTag).asInteger();
}
- _docIdLimit = _datHeader.getTag(docIdLimitTag).asInteger();
+ _docIdLimit = _datFile.header().getTag(docIdLimitTag).asInteger();
if (hasIdx()) {
- vespalib::FileHeader idxHeader(DIRECTIO_ALIGNMENT);
- _idxHeaderLen = idxHeader.readFile(*_idxFile);
- _idxFile->SetPosition(_idxHeaderLen);
- if (!attr.headerTypeOK(idxHeader) ||
- !extractFileSize(idxHeader, *_idxFile, _idxFileSize)) {
- _idxFile->Close();
+ if (!attr.headerTypeOK(_idxFile.header())) {
+ _idxFile.close();
} else {
_currIdx = _idxReader.readHostOrder();
}
}
if (hasWeight()) {
- vespalib::FileHeader weightHeader(DIRECTIO_ALIGNMENT);
- _weightHeaderLen = weightHeader.readFile(*_weightFile);
- _weightFile->SetPosition(_weightHeaderLen);
- if (!attr.headerTypeOK(weightHeader))
- _weightFile->Close();
+ if (!attr.headerTypeOK(_weightFile.header())) {
+ _weightFile.close();
+ }
}
- if (hasData() && AttributeVector::isEnumerated(_datHeader)) {
+ if (hasData() && AttributeVector::isEnumerated(_datFile.header())) {
_enumerated = true;
}
_hasLoadData = hasData() &&
@@ -93,40 +76,30 @@ ReaderBase::~ReaderBase() = default;
bool
ReaderBase::hasWeight() const {
- return _weightFile.get() && _weightFile->IsOpened();
+ return _weightFile.valid();
}
bool
ReaderBase::hasIdx() const {
- return _idxFile.get() && _idxFile->IsOpened();
+ return _idxFile.valid();
}
bool
ReaderBase::hasData() const {
- return _datFile.get() && _datFile->IsOpened();
-}
-
-bool
-ReaderBase::
-extractFileSize(const vespalib::GenericHeader &header,
- FastOS_FileInterface &file, uint64_t &fileSize)
-{
- fileSize = file.GetSize();
- return FileSizeCalculator::extractFileSize(header, header.getSize(),
- file.GetFileName(), fileSize);
+ return _datFile.valid();
}
void
ReaderBase::rewind()
{
- _datFile->SetPosition(_datHeaderLen);
+ _datFile.rewind();
_currIdx = 0;
if (hasIdx()) {
- _idxFile->SetPosition(_idxHeaderLen);
+ _idxFile.rewind();
_currIdx = _idxReader.readHostOrder();
}
if (hasWeight()) {
- _weightFile->SetPosition(_weightHeaderLen);
+ _weightFile.rewind();
}
}
@@ -137,13 +110,13 @@ ReaderBase::getNumValues()
return getEnumCount();
} else {
if (_fixedWidth > 0) {
- size_t dataSize(_datFileSize - _datHeaderLen);
+ size_t dataSize = _datFile.data_size();
assert((dataSize % _fixedWidth) == 0);
return dataSize / _fixedWidth;
} else {
// TODO. This limits the number of multivalues to 2^32-1
// This is assert during write, so this should never be a problem here.
- _idxFile->SetPosition(_idxFileSize - 4);
+ _idxFile.file().SetPosition(_idxFile.file_size() - 4);
size_t numValues = _idxReader.readHostOrder();
rewind();
return numValues;
diff --git a/searchlib/src/vespa/searchlib/attribute/readerbase.h b/searchlib/src/vespa/searchlib/attribute/readerbase.h
index a7685e4532a..c439fb81738 100644
--- a/searchlib/src/vespa/searchlib/attribute/readerbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/readerbase.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/searchlib/util/file_with_header.h>
#include <vespa/searchlib/util/fileutil.h>
#include <cassert>
@@ -21,18 +22,15 @@ public:
bool hasData() const;
uint32_t getNumIdx() const {
- return (_idxFileSize - _idxHeaderLen) /sizeof(uint32_t);
+ return (_idxFile.data_size()) /sizeof(uint32_t);
}
size_t getEnumCount() const {
- size_t dataSize(_datFileSize - _datHeaderLen);
+ size_t dataSize = _datFile.data_size();
assert((dataSize % sizeof(uint32_t)) == 0);
return dataSize / sizeof(uint32_t);
}
- static bool
- extractFileSize(const vespalib::GenericHeader &header, FastOS_FileInterface &file, uint64_t &fileSize);
-
size_t getNumValues();
int32_t getNextWeight() { return _weightReader.readHostOrder(); }
uint32_t getNextEnum() { return _enumReader.readHostOrder(); }
@@ -43,32 +41,26 @@ public:
uint32_t getVersion() const { return _version; }
uint32_t getDocIdLimit() const { return _docIdLimit; }
const vespalib::GenericHeader &getDatHeader() const {
- return _datHeader;
+ return _datFile.header();
}
protected:
- std::unique_ptr<FastOS_FileInterface> _datFile;
+ FileWithHeader _datFile;
private:
- std::unique_ptr<FastOS_FileInterface> _weightFile;
- std::unique_ptr<FastOS_FileInterface> _idxFile;
+ FileWithHeader _weightFile;
+ FileWithHeader _idxFile;
FileReader<int32_t> _weightReader;
FileReader<uint32_t> _idxReader;
FileReader<uint32_t> _enumReader;
uint32_t _currIdx;
- uint32_t _datHeaderLen;
- uint32_t _idxHeaderLen;
- uint32_t _weightHeaderLen;
uint64_t _createSerialNum;
size_t _fixedWidth;
bool _enumerated;
bool _hasLoadData;
uint32_t _version;
uint32_t _docIdLimit;
- vespalib::FileHeader _datHeader;
- uint64_t _datFileSize;
- uint64_t _idxFileSize;
protected:
size_t getDataCountHelper(size_t elemSize) const {
- size_t dataSize(_datFileSize - _datHeaderLen);
+ size_t dataSize = _datFile.data_size();
return dataSize / elemSize;
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
index 42bc8438d8c..42418c736bf 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
@@ -1,13 +1,14 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "singleboolattribute.h"
#include "attributevector.hpp"
-#include "primitivereader.h"
#include "iattributesavetarget.h"
#include "ipostinglistsearchcontext.h"
+#include "primitivereader.h"
+#include "singleboolattribute.h"
+#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
-#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -245,7 +246,7 @@ SingleBoolAttribute::onShrinkLidSpace()
uint64_t
SingleBoolAttribute::getEstimatedSaveByteSize() const
{
- constexpr uint64_t headerSize = 4_Ki + sizeof(uint32_t);
+ constexpr uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT + sizeof(uint32_t);
return headerSize + _bv.sizeBytes();
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
index 92e25097f40..359da48fb59 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp
@@ -1,7 +1,8 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "singlenumericattributesaver.h"
#include "iattributesavetarget.h"
+#include "singlenumericattributesaver.h"
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -9,20 +10,13 @@ using vespalib::GenerationHandler;
namespace search {
-namespace {
-
-const uint32_t MIN_ALIGNMENT = 4_Ki;
-
-}
-
-
SingleValueNumericAttributeSaver::
SingleValueNumericAttributeSaver(const attribute::AttributeHeader &header,
const void *data, size_t size)
: AttributeSaver(vespalib::GenerationHandler::Guard(), header),
_buf()
{
- _buf = std::make_unique<BufferBuf>(size, MIN_ALIGNMENT);
+ _buf = std::make_unique<BufferBuf>(size, FileSettings::DIRECTIO_ALIGNMENT);
assert(_buf->getFreeLen() >= size);
if (size > 0) {
memcpy(_buf->getFree(), data, size);
@@ -31,11 +25,8 @@ SingleValueNumericAttributeSaver(const attribute::AttributeHeader &header,
assert(_buf->getDataLen() == size);
}
-
SingleValueNumericAttributeSaver::~SingleValueNumericAttributeSaver() = default;
-
-
bool
SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
{
@@ -43,5 +34,4 @@ SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
return true;
}
-
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
index cdcc70cc01d..72aadbc6d93 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp
@@ -1,12 +1,13 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "singlesmallnumericattribute.h"
-#include "attributevector.hpp"
-#include "primitivereader.h"
#include "attributeiterators.hpp"
+#include "attributevector.hpp"
#include "iattributesavetarget.h"
+#include "primitivereader.h"
+#include "singlesmallnumericattribute.h"
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/databuffer.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -200,7 +201,7 @@ SingleValueSmallNumericAttribute::onShrinkLidSpace()
uint64_t
SingleValueSmallNumericAttribute::getEstimatedSaveByteSize() const
{
- uint64_t headerSize = 4_Ki;
+ uint64_t headerSize = FileSettings::DIRECTIO_ALIGNMENT;
const size_t numDocs(getCommittedDocIdLimit());
const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift);
const size_t sz((numDataWords + 1) * sizeof(Word));
diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp
index 8c1cbc3d672..7f5ae051978 100644
--- a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp
@@ -3,6 +3,7 @@
#include "sourceselector.h"
#include <vespa/fastlib/io/bufferedfile.h>
#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/util/size_literals.h>
using search::queryeval::Source;
@@ -83,7 +84,7 @@ SourceSelector::LoadInfo::load()
// XXX no checking for success
file.ReadOpen(fileName.c_str());
- FileHeader fileHeader(4_Ki);
+ FileHeader fileHeader(FileSettings::DIRECTIO_ALIGNMENT);
fileHeader.readFile(file);
if (fileHeader.hasTag(defaultSourceTag)) {
_header._defaultSource = fileHeader.getTag(defaultSourceTag).asInteger();
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index 97edece249e..6f551222286 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -4,6 +4,7 @@
#include "allocatedbitvector.h"
#include "growablebitvector.h"
#include "partialbitvector.h"
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -34,7 +35,6 @@ void verifyInclusiveStart(const search::BitVector & a, const search::BitVector &
}
constexpr size_t MMAP_LIMIT = 256_Mi;
-constexpr size_t DIRECTIO_ALIGNMENT = 4_Ki;
}
@@ -341,7 +341,8 @@ BitVector::create(Index numberOfElements, FastOS_FileInterface &file,
size_t vectorsize = getFileBytes(numberOfElements);
file.DirectIOPadding(offset, vectorsize, padbefore, padafter);
assert((padbefore & (getAlignment() - 1)) == 0);
- AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter, MMAP_LIMIT, DIRECTIO_ALIGNMENT);
+ AllocatedBitVector::Alloc alloc = Alloc::alloc(padbefore + vectorsize + padafter,
+ MMAP_LIMIT, FileSettings::DIRECTIO_ALIGNMENT);
void * alignedBuffer = alloc.get();
file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore);
bv = std::make_unique<AllocatedBitVector>(numberOfElements, std::move(alloc), padbefore);
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
index 828e38b5267..2e4218ed36c 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
@@ -1,9 +1,10 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "bitvectorfile.h"
-#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/index/bitvectorkeys.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/util/size_literals.h>
#include <cassert>
@@ -25,8 +26,6 @@ readHeader(vespalib::FileHeader &h,
file.Close();
}
-const size_t FILE_HEADERSIZE_ALIGNMENT = 4_Ki;
-
}
BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope)
@@ -93,7 +92,7 @@ BitVectorFileWrite::open(const vespalib::string &name,
void
BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext)
{
- vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); // 64 byte alignment on bitvector.dat header
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
typedef vespalib::GenericHeader::Tag Tag;
fileHeaderContext.addTags(h, _datFile->GetFileName());
h.putTag(Tag("docIdLimit", _docIdLimit));
@@ -110,7 +109,7 @@ BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext)
void
BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize)
{
- vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
typedef vespalib::GenericHeader::Tag Tag;
readHeader(h, _datFile->GetFileName());
FileHeaderContext::setFreezeTime(h);
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
index 95d2b286d7f..176840c4903 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
@@ -1,9 +1,10 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "bitvectoridxfile.h"
-#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/index/bitvectorkeys.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/util/size_literals.h>
#include <cassert>
@@ -24,8 +25,6 @@ readHeader(vespalib::FileHeader &h, const vespalib::string &name)
file.Close();
}
-const size_t FILE_HEADERSIZE_ALIGNMENT = 4_Ki;
-
}
BitVectorIdxFileWrite::BitVectorIdxFileWrite(BitVectorKeyScope scope)
@@ -90,7 +89,7 @@ BitVectorIdxFileWrite::open(const vespalib::string &name,
void
BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext)
{
- vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
typedef vespalib::GenericHeader::Tag Tag;
fileHeaderContext.addTags(h, _idxFile->GetFileName());
h.putTag(Tag("docIdLimit", _docIdLimit));
@@ -108,7 +107,7 @@ BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext)
void
BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize)
{
- vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
typedef vespalib::GenericHeader::Tag Tag;
readHeader(h, _idxFile->GetFileName());
FileHeaderContext::setFreezeTime(h);
diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
index 64fb6142db7..70a67705687 100644
--- a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp
@@ -2,7 +2,7 @@
#include "pagedict4file.h"
#include <vespa/searchlib/common/fileheadercontext.h>
-#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/util/size_literals.h>
@@ -36,12 +36,6 @@ using vespalib::getLastErrorString;
namespace search::diskindex {
-namespace {
-
-const uint32_t headerAlign = 4_Ki;
-
-}
-
PageDict4FileSeqRead::PageDict4FileSeqRead()
: _pReader(nullptr),
_ssReader(nullptr),
@@ -467,7 +461,7 @@ PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext)
// subheader only written to SS file.
typedef vespalib::GenericHeader::Tag Tag;
- vespalib::FileHeader header(headerAlign);
+ vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
fileHeaderContext.addTags(header, _pfile.GetFileName());
header.putTag(Tag("frozen", 0));
@@ -499,7 +493,7 @@ PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext)
// subheader only written to SS file.
typedef vespalib::GenericHeader::Tag Tag;
- vespalib::FileHeader header(headerAlign);
+ vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
fileHeaderContext.addTags(header, _spfile.GetFileName());
header.putTag(Tag("frozen", 0));
@@ -529,7 +523,7 @@ PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext)
ComprFileWriteContext &wc = _ssWriteContext;
typedef vespalib::GenericHeader::Tag Tag;
- vespalib::FileHeader header(headerAlign);
+ vespalib::FileHeader header(FileSettings::DIRECTIO_ALIGNMENT);
fileHeaderContext.addTags(header, _ssfile.GetFileName());
header.putTag(Tag("frozen", 0));
@@ -557,7 +551,7 @@ PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext)
void
PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize)
{
- vespalib::FileHeader h(headerAlign);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
FastOS_File f;
f.OpenReadWrite(_pfile.GetFileName());
h.readFile(f);
@@ -574,7 +568,7 @@ PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize)
void
PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize)
{
- vespalib::FileHeader h(headerAlign);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
FastOS_File f;
f.OpenReadWrite(_spfile.GetFileName());
h.readFile(f);
@@ -591,7 +585,7 @@ PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize)
void
PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize)
{
- vespalib::FileHeader h(headerAlign);
+ vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
FastOS_File f;
f.OpenReadWrite(_ssfile.GetFileName());
h.readFile(f);
diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
index f498b93ca14..5740092269d 100644
--- a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
+++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp
@@ -1,16 +1,17 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "writeablefilechunk.h"
#include "data_store_file_chunk_stats.h"
#include "summaryexceptions.h"
-#include <vespa/vespalib/util/lambdatask.h>
-#include <vespa/vespalib/util/array.hpp>
-#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/vespalib/data/fileheader.h>
-#include <vespa/vespalib/data/databuffer.h>
+#include "writeablefilechunk.h"
#include <vespa/searchlib/common/fileheadercontext.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/searchlib/util/file_settings.h>
+#include <vespa/vespalib/data/databuffer.h>
+#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/util/array.hpp>
+#include <vespa/vespalib/util/lambdatask.h>
+#include <vespa/vespalib/util/size_literals.h>
#include <vespa/log/log.h>
LOG_SETUP(".search.writeablefilechunk");
@@ -27,8 +28,7 @@ namespace search {
namespace {
-const uint64_t Alignment = 4_Ki;
-const uint64_t headerAlign = 4_Ki;
+const size_t Alignment = FileSettings::DIRECTIO_ALIGNMENT;
}
@@ -785,7 +785,7 @@ void
WriteableFileChunk::writeDataHeader(const FileHeaderContext &fileHeaderContext)
{
typedef FileHeader::Tag Tag;
- FileHeader h(headerAlign);
+ FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
assert(_dataFile.IsOpened());
assert(_dataFile.IsWriteMode());
assert(_dataFile.GetPosition() == 0);
diff --git a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h
index 7c34b60e93d..f96b4e46e98 100644
--- a/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h
+++ b/searchlib/src/vespa/searchlib/tensor/blob_sequence_reader.h
@@ -17,10 +17,10 @@ private:
public:
BlobSequenceReader(AttributeVector &attr)
: ReaderBase(attr),
- _sizeReader(*_datFile)
+ _sizeReader(_datFile.file())
{ }
uint32_t getNextSize() { return _sizeReader.readHostOrder(); }
- void readBlob(void *buf, size_t len) { _datFile->ReadBuf(buf, len); }
+ void readBlob(void *buf, size_t len) { _datFile.file().ReadBuf(buf, len); }
};
} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index fd86fbf1c73..00aede95ca4 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -43,7 +43,7 @@ public:
BlobSequenceReader(AttributeVector &attr);
~BlobSequenceReader();
bool is_present();
- void readTensor(void *buf, size_t len) { _datFile->ReadBuf(buf, len); }
+ void readTensor(void *buf, size_t len) { _datFile.file().ReadBuf(buf, len); }
};
BlobSequenceReader::BlobSequenceReader(AttributeVector &attr)
@@ -55,7 +55,7 @@ BlobSequenceReader::~BlobSequenceReader() = default;
bool
BlobSequenceReader::is_present() {
unsigned char detect;
- _datFile->ReadBuf(&detect, sizeof(detect));
+ _datFile.file().ReadBuf(&detect, sizeof(detect));
if (detect == tensorIsNotPresent) {
return false;
}
diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
index 320a6480202..1b311e8d639 100644
--- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(searchlib_util OBJECT
comprfile.cpp
dirtraverse.cpp
drainingbufferwriter.cpp
+ file_with_header.cpp
filealign.cpp
fileheadertk.cpp
filekit.cpp
diff --git a/searchlib/src/vespa/searchlib/util/file_settings.h b/searchlib/src/vespa/searchlib/util/file_settings.h
new file mode 100644
index 00000000000..1bdd1a56cda
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/file_settings.h
@@ -0,0 +1,17 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/size_literals.h>
+
+namespace search {
+
+/**
+ * Common settings that is used for file I/O.
+ */
+struct FileSettings {
+ // The alignment (in bytes) used for DIRECT I/O write and read.
+ static constexpr size_t DIRECTIO_ALIGNMENT = 4_Ki;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/file_with_header.cpp b/searchlib/src/vespa/searchlib/util/file_with_header.cpp
new file mode 100644
index 00000000000..b004f2b29d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/file_with_header.cpp
@@ -0,0 +1,59 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "file_settings.h"
+#include "file_with_header.h"
+#include "filesizecalculator.h"
+#include <vespa/fastos/file.h>
+#include <vespa/vespalib/util/size_literals.h>
+
+namespace search {
+
+namespace {
+
+bool
+extract_file_size(const vespalib::GenericHeader& header,
+ FastOS_FileInterface& file, uint64_t& file_size)
+{
+ file_size = file.GetSize();
+ return FileSizeCalculator::extractFileSize(header, header.getSize(),file.GetFileName(), file_size);
+}
+
+}
+
+FileWithHeader::FileWithHeader(std::unique_ptr<FastOS_FileInterface> file_in)
+ : _file(std::move(file_in)),
+ _header(FileSettings::DIRECTIO_ALIGNMENT),
+ _header_len(0),
+ _file_size(0)
+{
+ if (valid()) {
+ _header_len = _header.readFile(*_file);
+ _file->SetPosition(_header_len);
+ if (!extract_file_size(_header, *_file, _file_size)) {
+ _file->Close();
+ }
+ }
+}
+
+FileWithHeader::~FileWithHeader() = default;
+
+bool
+FileWithHeader::valid() const
+{
+ return _file && _file->IsOpened();
+}
+
+void
+FileWithHeader::rewind()
+{
+ _file->SetPosition(_header_len);
+}
+
+void
+FileWithHeader::close()
+{
+ _file->Close();
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/util/file_with_header.h b/searchlib/src/vespa/searchlib/util/file_with_header.h
new file mode 100644
index 00000000000..4432b76be67
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/util/file_with_header.h
@@ -0,0 +1,38 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/data/fileheader.h>
+#include <memory>
+
+class FastOS_FileInterface;
+
+namespace search {
+
+/**
+ * Class that encapsulates a file containing a generic file header, followed by binary data.
+ *
+ * After construction the file is positioned at the start of the binary data.
+ * It's assumed that the file was written using FileSettings::DIRECTIO_ALIGNMENT.
+ */
+class FileWithHeader {
+private:
+ std::unique_ptr<FastOS_FileInterface> _file;
+ vespalib::FileHeader _header;
+ uint64_t _header_len;
+ uint64_t _file_size;
+
+public:
+ FileWithHeader(std::unique_ptr<FastOS_FileInterface> file_in);
+ ~FileWithHeader();
+ FastOS_FileInterface& file() const { return *_file; }
+ const vespalib::GenericHeader& header() const { return _header; }
+ uint64_t file_size() const { return _file_size; }
+ uint64_t data_size() const { return _file_size - _header_len; }
+
+ bool valid() const;
+ void rewind();
+ void close();
+};
+
+}