diff options
8 files changed, 148 insertions, 288 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index d71ddc2c2d6..64a54187254 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -39,7 +39,9 @@ DiskIndex::Key::Key() = default; DiskIndex::Key::Key(const IndexList & indexes, vespalib::stringref word) : _word(word), _indexes(indexes) -{ } +{ +} + DiskIndex::Key::~Key() = default; DiskIndex::DiskIndex(const vespalib::string &indexDir, size_t cacheSize) @@ -73,7 +75,6 @@ DiskIndex::loadSchema() return true; } - bool DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch) { @@ -91,7 +92,6 @@ DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch) return true; } - bool DiskIndex::openField(const vespalib::string &fieldDir, const TuneFileSearch &tuneFileSearch) @@ -147,7 +147,6 @@ DiskIndex::openField(const vespalib::string &fieldDir, return true; } - bool DiskIndex::setup(const TuneFileSearch &tuneFileSearch) { @@ -165,7 +164,6 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch) return true; } - bool DiskIndex::setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old) @@ -315,7 +313,6 @@ DiskIndex::readPostingList(const LookupResult &lookupRes) const return handle; } - BitVector::UP DiskIndex::readBitVector(const LookupResult &lookupRes) const { @@ -327,7 +324,6 @@ DiskIndex::readBitVector(const LookupResult &lookupRes) const return dict->lookup(lookupRes.wordNum); } - void DiskIndex::calculateSize() { @@ -335,19 +331,18 @@ DiskIndex::calculateSize() _size = dirt.GetTreeSize(); } - namespace { DiskIndex::LookupResult _G_nothing; -class LookupCache -{ +class LookupCache { public: LookupCache(DiskIndex & diskIndex, const std::vector<uint32_t> & fieldIds) : _diskIndex(diskIndex), _fieldIds(fieldIds), _cache() - { } + { + } const DiskIndex::LookupResult & lookup(const vespalib::string & word, uint32_t fieldId) { Cache::const_iterator it = _cache.find(word); @@ -363,14 +358,14 @@ public: return _G_nothing; } private: + typedef vespalib::hash_map<vespalib::string, DiskIndex::LookupResultVector> Cache; DiskIndex & _diskIndex; const std::vector<uint32_t> & _fieldIds; Cache _cache; }; -class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper -{ +class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper { private: LookupCache &_cache; DiskIndex &_diskIndex; @@ -391,8 +386,7 @@ public: } template <class TermNode> - void visitTerm(TermNode &n) - { + void visitTerm(TermNode &n) { const vespalib::string termStr = termAsString(n); const DiskIndex::LookupResult & lookupRes = _cache.lookup(termStr, _fieldId); if (lookupRes.valid()) { @@ -418,7 +412,6 @@ public: void visit(PredicateQuery &) override { } }; - Blueprint::UP createBlueprintHelper(LookupCache & cache, DiskIndex & diskIndex, const IRequestContext & requestContext, const FieldSpec &field, uint32_t fieldId, const Node &term) @@ -442,7 +435,6 @@ DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSp return createBlueprintHelper(cache, *this, requestContext, field, fieldIds[0], term); } - Blueprint::UP DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) { diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h index 4bef53a3030..d83b2f56d7c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -12,14 +12,13 @@ namespace search::diskindex { /** - * This class represents a disk index with a common dictionary, and - * posting list files and bit vector files for each field. - * Parts of the disk dictionary and all bit vector - * dictionaries are loaded into memory during setup. All other files - * are just opened, ready for later access. - **/ -class DiskIndex : public queryeval::Searchable -{ + * This class represents a disk index that contains a set of field indexes that are independent of each other. + * + * Each field index has a dictionary, posting list files and bit vector files. + * Parts of the disk dictionary and all bit vector dictionaries are loaded into memory during setup. + * All other files are just opened, ready for later access. + */ +class DiskIndex : public queryeval::Searchable { public: /** * The result after performing a disk dictionary lookup. @@ -60,11 +59,12 @@ public: vespalib::string _word; IndexList _indexes; }; + private: - typedef index::PostingListFileRandRead DiskPostingFile; - typedef Zc4PosOccRandRead DiskPostingFileReal; - typedef ZcPosOccRandRead DiskPostingFileDynamicKReal; - typedef vespalib::cache<vespalib::CacheParam<vespalib::LruParam<Key, LookupResultVector>, DiskIndex>> Cache; + using DiskPostingFile = index::PostingListFileRandRead; + using DiskPostingFileReal = Zc4PosOccRandRead; + using DiskPostingFileDynamicKReal = ZcPosOccRandRead; + using Cache = vespalib::cache<vespalib::CacheParam<vespalib::LruParam<Key, LookupResultVector>, DiskIndex>>; vespalib::string _indexDir; size_t _cacheSize; @@ -83,11 +83,11 @@ private: public: /** - * Create a view of the disk index located in the given directory - * described by the given schema. + * Create a view of the disk index located in the given directory. * * @param indexDir the directory where the disk index is located. - **/ + * @param cacheSize optional size (in bytes) of the disk dictionary lookup cache. + */ DiskIndex(const vespalib::string &indexDir, size_t cacheSize=0); ~DiskIndex(); @@ -95,29 +95,27 @@ public: * Setup this instance by opening and loading relevant index files. * * @return true if this instance was successfully setup. - **/ + */ bool setup(const TuneFileSearch &tuneFileSearch); bool setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old); /** - * Perform a dictionary lookup for the given word in the given - * field. + * Perform a dictionary lookup for the given word in the given field. * - * @param indexId the id of the field to - * perform lookup for. + * @param indexId the id of the field to perform lookup for. * @param word the word to lookup. * @return the lookup result or nullptr if the word is not found. - **/ + */ LookupResult::UP lookup(uint32_t indexId, vespalib::stringref word); - LookupResultVector lookup(const std::vector<uint32_t> & indexes, vespalib::stringref word); + LookupResultVector lookup(const std::vector<uint32_t> & indexes, vespalib::stringref word); /** * Read the posting list corresponding to the given lookup result. * * @param lookupRes the result of the previous dictionary lookup. * @return a handle for the posting list in memory. - **/ + */ index::PostingListHandle::UP readPostingList(const LookupResult &lookupRes) const; /** @@ -126,22 +124,19 @@ public: * @param lookupRes the result of the previous dictionary lookup. * @return the bit vector or nullptr if no bit vector exists for the * word in the lookup result. - **/ + */ BitVector::UP readBitVector(const LookupResult &lookupRes) const; - queryeval::Blueprint::UP - createBlueprint(const queryeval::IRequestContext & requestContext, - const queryeval::FieldSpec &field, - const query::Node &term) override; + queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpec &field, + const query::Node &term) override; - queryeval::Blueprint::UP - createBlueprint(const queryeval::IRequestContext & requestContext, - const queryeval::FieldSpecList &fields, - const query::Node &term) override; + queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpecList &fields, + const query::Node &term) override; /** * Get the size on disk of this index. - * @return the size of the index. */ uint64_t getSize() const { return _size; } diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp index 6454c0851a7..8c2b33a933e 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp @@ -98,7 +98,6 @@ FieldWriter::open(const vespalib::string &prefix, return true; } - void FieldWriter::flush() { @@ -120,7 +119,6 @@ FieldWriter::flush() } } - void FieldWriter::newWord(uint64_t wordNum, vespalib::stringref word) { @@ -134,14 +132,12 @@ FieldWriter::newWord(uint64_t wordNum, vespalib::stringref word) _prevDocId = 0; } - void FieldWriter::newWord(vespalib::stringref word) { newWord(_wordNum + 1, word); } - bool FieldWriter::close() { @@ -183,7 +179,6 @@ FieldWriter::getFeatureParams(PostingListParams ¶ms) _posoccfile->getFeatureParams(params); } - static const char *termOccNames[] = { "boolocc.bdat", @@ -199,7 +194,6 @@ static const char *termOccNames[] = nullptr, }; - void FieldWriter::remove(const vespalib::string &prefix) { diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index 9a6edf90243..1e9afb717e8 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -10,15 +10,13 @@ namespace search::diskindex { -/* - * FieldWriter is used to write a dictionary and posting list file - * together. +/** + * FieldWriter is used to write a dictionary and posting list file together. * * It is used by the fusion code to write the merged output for a field, * and by the memory index dump code to write a field to disk. */ -class FieldWriter -{ +class FieldWriter { private: uint64_t _wordNum; uint32_t _prevDocId; @@ -28,14 +26,15 @@ public: using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite; - typedef index::PostingListFileSeqWrite PostingListFileSeqWrite; - typedef index::DocIdAndFeatures DocIdAndFeatures; - typedef index::Schema Schema; - typedef index::PostingListCounts PostingListCounts; - typedef index::PostingListParams PostingListParams; + using PostingListFileSeqWrite = index::PostingListFileSeqWrite; + using DocIdAndFeatures = index::DocIdAndFeatures; + using Schema = index::Schema; + using PostingListCounts = index::PostingListCounts; + using PostingListParams = index::PostingListParams; std::unique_ptr<DictionaryFileSeqWrite> _dictFile; std::unique_ptr<PostingListFileSeqWrite> _posoccfile; + private: BitVectorCandidate _bvc; BitVectorFileWrite _bmapfile; diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp index a3c37cb91f6..964f37eb5cf 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp @@ -33,8 +33,7 @@ noWordPos() } -class FileHandle -{ +class FileHandle { public: FieldWriter *_fieldWriter; DocIdAndFeatures _docIdAndFeatures; @@ -43,22 +42,18 @@ public: ~FileHandle(); - void - open(vespalib::stringref dir, - const SchemaUtil::IndexIterator &index, - uint32_t docIdLimit, uint64_t numWordIds, - const TuneFileSeqWrite &tuneFileWrite, - const FileHeaderContext &fileHeaderContext); + void open(vespalib::stringref dir, + const SchemaUtil::IndexIterator &index, + uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext); - void - close(); + void close(); }; - } -class IndexBuilder::FieldHandle -{ +class IndexBuilder::FieldHandle { public: FieldHandle(const Schema &schema, uint32_t fieldId, @@ -66,20 +61,15 @@ public: ~FieldHandle(); - static uint32_t - noDocRef() - { + static uint32_t noDocRef() { return std::numeric_limits<uint32_t>::max(); } - static uint32_t - noElRef() - { + static uint32_t noElRef() { return std::numeric_limits<uint32_t>::max(); } - class FHWordDocFieldFeatures - { + class FHWordDocFieldFeatures { public: uint32_t _docId; uint32_t _numElements; @@ -90,28 +80,12 @@ public: { } - uint32_t - getDocId() const - { - return _docId; - } - - uint32_t - getNumElements() const - { - return _numElements; - } - - void - incNumElements() - { - ++_numElements; - } + uint32_t getDocId() const { return _docId; } + uint32_t getNumElements() const { return _numElements; } + void incNumElements() { ++_numElements; } }; - class FHWordDocElementFeatures - : public WordDocElementFeatures - { + class FHWordDocElementFeatures : public WordDocElementFeatures { public: uint32_t _docRef; @@ -127,24 +101,21 @@ public: } }; - class FHWordDocElementWordPosFeatures - : public WordDocElementWordPosFeatures - { + class FHWordDocElementWordPosFeatures : public WordDocElementWordPosFeatures { public: uint32_t _elementRef; - FHWordDocElementWordPosFeatures( - const WordDocElementWordPosFeatures &features, - uint32_t elementRef) + FHWordDocElementWordPosFeatures(const WordDocElementWordPosFeatures &features, + uint32_t elementRef) : WordDocElementWordPosFeatures(features), _elementRef(elementRef) { } }; - typedef vespalib::Array<FHWordDocFieldFeatures> FHWordDocFieldFeaturesVector; - typedef vespalib::Array<FHWordDocElementFeatures> FHWordDocElementFeaturesVector; - typedef vespalib::Array<FHWordDocElementWordPosFeatures> FHWordDocElementWordPosFeaturesVector; + using FHWordDocFieldFeaturesVector = vespalib::Array<FHWordDocFieldFeatures>; + using FHWordDocElementFeaturesVector = vespalib::Array<FHWordDocElementFeatures>; + using FHWordDocElementWordPosFeaturesVector = vespalib::Array<FHWordDocElementWordPosFeatures>; FHWordDocFieldFeaturesVector _wdff; FHWordDocElementFeaturesVector _wdfef; @@ -162,72 +133,35 @@ public: FileHandle _files; - void - startWord(vespalib::stringref word); - - void - endWord(); - - void - startDocument(uint32_t docId); - - void - endDocument(); - - void - startElement(uint32_t elementId, - int32_t weight, - uint32_t elementLen); - - void - endElement(); - - void - addOcc(const WordDocElementWordPosFeatures &features); - - void - setValid() - { - _valid = true; - } - - bool - getValid() const - { - return _valid; - } - - const Schema::IndexField & - getSchemaField(); - - const vespalib::string & - getName(); - - vespalib::string - getDir(); - - void - open(uint32_t docIdLimit, uint64_t numWordIds, - const TuneFileSeqWrite &tuneFileWrite, - const FileHeaderContext &fileHeaderContext); - - void - close(); - - uint32_t - getIndexId() const - { - return _fieldId; - } + void startWord(vespalib::stringref word); + void endWord(); + void startDocument(uint32_t docId); + void endDocument(); + void startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen); + void endElement(); + void addOcc(const WordDocElementWordPosFeatures &features); + + const Schema::IndexField &getSchemaField(); + const vespalib::string &getName(); + vespalib::string getDir(); + void open(uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext); + void close(); + + void setValid() { _valid = true; } + bool getValid() const { return _valid; } + uint32_t getIndexId() const { return _fieldId; } }; namespace { -class SingleIterator -{ +class SingleIterator { public: - typedef IndexBuilder::FieldHandle FH; + using FH = IndexBuilder::FieldHandle; FH::FHWordDocFieldFeaturesVector::const_iterator _dFeatures; FH::FHWordDocFieldFeaturesVector::const_iterator _dFeaturesE; FH::FHWordDocElementFeaturesVector::const_iterator _elFeatures; @@ -237,18 +171,13 @@ public: SingleIterator(FH &fieldHandle, uint32_t localFieldId); - void - appendFeatures(DocIdAndFeatures &features); + void appendFeatures(DocIdAndFeatures &features); - bool - isValid() const - { + bool isValid() const { return _dFeatures != _dFeaturesE; } - bool - operator<(const SingleIterator &rhs) const - { + bool operator<(const SingleIterator &rhs) const { if (_docId != rhs._docId) { return _docId < rhs._docId; } @@ -256,23 +185,19 @@ public: } }; - } - FileHandle::FileHandle() : _fieldWriter(nullptr), _docIdAndFeatures() { } - FileHandle::~FileHandle() { delete _fieldWriter; } - void FileHandle::open(vespalib::stringref dir, const SchemaUtil::IndexIterator &index, @@ -293,7 +218,6 @@ FileHandle::open(vespalib::stringref dir, } } - void FileHandle::close() { @@ -312,7 +236,6 @@ FileHandle::close() (void) ret; } - IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder *ib) @@ -331,10 +254,8 @@ IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, { } - IndexBuilder::FieldHandle::~FieldHandle() = default; - void IndexBuilder::FieldHandle::startWord(vespalib::stringref word) { @@ -342,7 +263,6 @@ IndexBuilder::FieldHandle::startWord(vespalib::stringref word) _files._fieldWriter->newWord(word); } - void IndexBuilder::FieldHandle::endWord() { @@ -362,7 +282,6 @@ IndexBuilder::FieldHandle::endWord() _elRef = noElRef(); } - void IndexBuilder::FieldHandle::startDocument(uint32_t docId) { @@ -373,7 +292,6 @@ IndexBuilder::FieldHandle::startDocument(uint32_t docId) _lowestOKElementId = 0u; } - void IndexBuilder::FieldHandle::endDocument() { @@ -385,12 +303,10 @@ IndexBuilder::FieldHandle::endDocument() _docRef = noDocRef(); } - void -IndexBuilder::FieldHandle:: -startElement(uint32_t elementId, - int32_t weight, - uint32_t elementLen) +IndexBuilder::FieldHandle::startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) { assert(_docRef != noDocRef()); assert(_elRef == noElRef()); @@ -407,7 +323,6 @@ startElement(uint32_t elementId, _lowestOKWordPos = 0u; } - void IndexBuilder::FieldHandle::endElement() { @@ -418,10 +333,8 @@ IndexBuilder::FieldHandle::endElement() _lowestOKElementId = ef.getElementId() + 1; } - void -IndexBuilder::FieldHandle:: -addOcc(const WordDocElementWordPosFeatures &features) +IndexBuilder::FieldHandle::addOcc(const WordDocElementWordPosFeatures &features) { assert(_elRef != noElRef()); FHWordDocElementFeatures &ef = _wdfef[_elRef]; @@ -435,29 +348,24 @@ addOcc(const WordDocElementWordPosFeatures &features) ef.incNumOccs(); } - const Schema::IndexField & IndexBuilder::FieldHandle::getSchemaField() { return _schema->getIndexField(_fieldId); } - const vespalib::string & IndexBuilder::FieldHandle::getName() { return getSchemaField().getName(); - } - vespalib::string IndexBuilder::FieldHandle::getDir() { return _ib->appendToPrefix(getName()); } - void IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds, const TuneFileSeqWrite &tuneFileWrite, @@ -468,14 +376,12 @@ IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds, docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext); } - void IndexBuilder::FieldHandle::close() { _files.close(); } - SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId) : _dFeatures(fieldHandle._wdff.begin()), _dFeaturesE(fieldHandle._wdff.end()), @@ -486,7 +392,6 @@ SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId) { } - void SingleIterator::appendFeatures(DocIdAndFeatures &features) { @@ -511,7 +416,6 @@ SingleIterator::appendFeatures(DocIdAndFeatures &features) } } - IndexBuilder::IndexBuilder(const Schema &schema) : index::IndexBuilder(schema), _currentField(nullptr), @@ -541,6 +445,27 @@ IndexBuilder::IndexBuilder(const Schema &schema) IndexBuilder::~IndexBuilder() = default; void +IndexBuilder::startField(uint32_t fieldId) +{ + assert(_curDocId == noDocId()); + assert(_currentField == nullptr); + assert(fieldId < _fields.size()); + assert(fieldId >= _lowestOKFieldId); + _currentField = &_fields[fieldId]; + assert(_currentField != nullptr); +} + +void +IndexBuilder::endField() +{ + assert(_curDocId == noDocId()); + assert(!_inWord); + assert(_currentField != nullptr); + _lowestOKFieldId = _currentField->_fieldId + 1; + _currentField = nullptr; +} + +void IndexBuilder::startWord(vespalib::stringref word) { assert(_currentField != nullptr); @@ -551,7 +476,6 @@ IndexBuilder::startWord(vespalib::stringref word) _currentField->startWord(word); } - void IndexBuilder::endWord() { @@ -562,7 +486,6 @@ IndexBuilder::endWord() _lowestOKDocId = 1u; } - void IndexBuilder::startDocument(uint32_t docId) { @@ -575,7 +498,6 @@ IndexBuilder::startDocument(uint32_t docId) _currentField->startDocument(docId); } - void IndexBuilder::endDocument() { @@ -586,30 +508,6 @@ IndexBuilder::endDocument() _curDocId = noDocId(); } - -void -IndexBuilder::startField(uint32_t fieldId) -{ - assert(_curDocId == noDocId()); - assert(_currentField == nullptr); - assert(fieldId < _fields.size()); - assert(fieldId >= _lowestOKFieldId); - _currentField = &_fields[fieldId]; - assert(_currentField != nullptr); -} - - -void -IndexBuilder::endField() -{ - assert(_curDocId == noDocId()); - assert(!_inWord); - assert(_currentField != nullptr); - _lowestOKFieldId = _currentField->_fieldId + 1; - _currentField = nullptr; -} - - void IndexBuilder::startElement(uint32_t elementId, int32_t weight, @@ -619,7 +517,6 @@ IndexBuilder::startElement(uint32_t elementId, _currentField->startElement(elementId, weight, elementLen); } - void IndexBuilder::endElement() { @@ -627,7 +524,6 @@ IndexBuilder::endElement() _currentField->endElement(); } - void IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features) { @@ -635,14 +531,12 @@ IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features) _currentField->addOcc(features); } - void IndexBuilder::setPrefix(vespalib::stringref prefix) { _prefix = prefix; } - vespalib::string IndexBuilder::appendToPrefix(vespalib::stringref name) { @@ -652,7 +546,6 @@ IndexBuilder::appendToPrefix(vespalib::stringref name) return _prefix + "/" + name; } - void IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds, const TuneFileIndexing &tuneFileIndexing, @@ -682,7 +575,6 @@ IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds, } } - void IndexBuilder::close() { diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h index fa818bf08e6..43ac49a0a72 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h @@ -13,12 +13,16 @@ namespace search::diskindex { class BitVectorCandidate; -class IndexBuilder : public index::IndexBuilder -{ +/** + * Class used to build a disk index for the set of index fields specified in a schema. + * + * The resulting disk index consists of field indexes that are independent of each other. + */ +class IndexBuilder : public index::IndexBuilder { public: class FieldHandle; - typedef index::Schema Schema; + using Schema = index::Schema; private: // Text fields FieldHandle *_currentField; @@ -49,19 +53,16 @@ public: IndexBuilder(const Schema &schema); ~IndexBuilder() override; + void startField(uint32_t fieldId) override; + void endField() override; void startWord(vespalib::stringref word) override; void endWord() override; void startDocument(uint32_t docId) override; void endDocument() override; - void startField(uint32_t fieldId) override; - void endField() override; void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) override; void endElement() override; void addOcc(const WordDocElementWordPosFeatures &features) override; - // TODO: methods for attribute vectors. - - // TODO: methods for document summary. void setPrefix(vespalib::stringref prefix); vespalib::string appendToPrefix(vespalib::stringref name); diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp index 6b88c51e6cc..d585238107a 100644 --- a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp +++ b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp @@ -6,7 +6,8 @@ namespace search::index { IndexBuilder::IndexBuilder(const Schema &schema) : _schema(schema) -{ } +{ +} IndexBuilder::~IndexBuilder() = default; diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h index 66ca740a20c..0496809336b 100644 --- a/searchlib/src/vespa/searchlib/index/indexbuilder.h +++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h @@ -8,6 +8,15 @@ namespace search::index { class Schema; class WordDocElementWordPosFeatures; +/** + * Interface used to build an index for the set of index fields specified in a schema. + * + * + * The index should be built as follows: + * For each field add the set of unique words in sorted order. + * For each word add the set of document ids in sorted order. + * For each document id add the position information for that document. + */ class IndexBuilder { protected: const Schema &_schema; @@ -15,39 +24,16 @@ protected: public: IndexBuilder(const Schema &schema); - virtual - ~IndexBuilder(); - - virtual void - startWord(vespalib::stringref word) = 0; - - virtual void - endWord() = 0; - - virtual void - startDocument(uint32_t docId) = 0; - - virtual void - endDocument() = 0; - - virtual void - startField(uint32_t fieldId) = 0; - - virtual void - endField() = 0; - - virtual void - startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0; - - virtual void - endElement() = 0; - - virtual void - addOcc(const WordDocElementWordPosFeatures &features) = 0; - - // TODO: methods for attribute vectors. - - // TODO: methods for document summary. + virtual ~IndexBuilder(); + virtual void startField(uint32_t fieldId) = 0; + virtual void endField() = 0; + virtual void startWord(vespalib::stringref word) = 0; + virtual void endWord() = 0; + virtual void startDocument(uint32_t docId) = 0; + virtual void endDocument() = 0; + virtual void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0; + virtual void endElement() = 0; + virtual void addOcc(const WordDocElementWordPosFeatures &features) = 0; }; } |