summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-04-25 08:36:01 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-04-25 08:36:01 +0000
commitf41e497ef360aeda5d67827b3ac72cb10ad662b3 (patch)
treea66028eb40a0949ab67dcef9bc77e42d27ac05d6 /searchlib
parent38d990ac83653b10d06619173f23355d9769cf65 (diff)
Add more class comments + style changes.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.h61
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/fieldwriter.h19
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp252
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.h17
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.h52
8 files changed, 148 insertions, 288 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index d71ddc2c2d6..64a54187254 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -39,7 +39,9 @@ DiskIndex::Key::Key() = default;
DiskIndex::Key::Key(const IndexList & indexes, vespalib::stringref word) :
_word(word),
_indexes(indexes)
-{ }
+{
+}
+
DiskIndex::Key::~Key() = default;
DiskIndex::DiskIndex(const vespalib::string &indexDir, size_t cacheSize)
@@ -73,7 +75,6 @@ DiskIndex::loadSchema()
return true;
}
-
bool
DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch)
{
@@ -91,7 +92,6 @@ DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch)
return true;
}
-
bool
DiskIndex::openField(const vespalib::string &fieldDir,
const TuneFileSearch &tuneFileSearch)
@@ -147,7 +147,6 @@ DiskIndex::openField(const vespalib::string &fieldDir,
return true;
}
-
bool
DiskIndex::setup(const TuneFileSearch &tuneFileSearch)
{
@@ -165,7 +164,6 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch)
return true;
}
-
bool
DiskIndex::setup(const TuneFileSearch &tuneFileSearch,
const DiskIndex &old)
@@ -315,7 +313,6 @@ DiskIndex::readPostingList(const LookupResult &lookupRes) const
return handle;
}
-
BitVector::UP
DiskIndex::readBitVector(const LookupResult &lookupRes) const
{
@@ -327,7 +324,6 @@ DiskIndex::readBitVector(const LookupResult &lookupRes) const
return dict->lookup(lookupRes.wordNum);
}
-
void
DiskIndex::calculateSize()
{
@@ -335,19 +331,18 @@ DiskIndex::calculateSize()
_size = dirt.GetTreeSize();
}
-
namespace {
DiskIndex::LookupResult _G_nothing;
-class LookupCache
-{
+class LookupCache {
public:
LookupCache(DiskIndex & diskIndex, const std::vector<uint32_t> & fieldIds) :
_diskIndex(diskIndex),
_fieldIds(fieldIds),
_cache()
- { }
+ {
+ }
const DiskIndex::LookupResult &
lookup(const vespalib::string & word, uint32_t fieldId) {
Cache::const_iterator it = _cache.find(word);
@@ -363,14 +358,14 @@ public:
return _G_nothing;
}
private:
+
typedef vespalib::hash_map<vespalib::string, DiskIndex::LookupResultVector> Cache;
DiskIndex & _diskIndex;
const std::vector<uint32_t> & _fieldIds;
Cache _cache;
};
-class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
-{
+class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper {
private:
LookupCache &_cache;
DiskIndex &_diskIndex;
@@ -391,8 +386,7 @@ public:
}
template <class TermNode>
- void visitTerm(TermNode &n)
- {
+ void visitTerm(TermNode &n) {
const vespalib::string termStr = termAsString(n);
const DiskIndex::LookupResult & lookupRes = _cache.lookup(termStr, _fieldId);
if (lookupRes.valid()) {
@@ -418,7 +412,6 @@ public:
void visit(PredicateQuery &) override { }
};
-
Blueprint::UP
createBlueprintHelper(LookupCache & cache, DiskIndex & diskIndex, const IRequestContext & requestContext,
const FieldSpec &field, uint32_t fieldId, const Node &term)
@@ -442,7 +435,6 @@ DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSp
return createBlueprintHelper(cache, *this, requestContext, field, fieldIds[0], term);
}
-
Blueprint::UP
DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term)
{
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
index 4bef53a3030..d83b2f56d7c 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.h
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h
@@ -12,14 +12,13 @@
namespace search::diskindex {
/**
- * This class represents a disk index with a common dictionary, and
- * posting list files and bit vector files for each field.
- * Parts of the disk dictionary and all bit vector
- * dictionaries are loaded into memory during setup. All other files
- * are just opened, ready for later access.
- **/
-class DiskIndex : public queryeval::Searchable
-{
+ * This class represents a disk index that contains a set of field indexes that are independent of each other.
+ *
+ * Each field index has a dictionary, posting list files and bit vector files.
+ * Parts of the disk dictionary and all bit vector dictionaries are loaded into memory during setup.
+ * All other files are just opened, ready for later access.
+ */
+class DiskIndex : public queryeval::Searchable {
public:
/**
* The result after performing a disk dictionary lookup.
@@ -60,11 +59,12 @@ public:
vespalib::string _word;
IndexList _indexes;
};
+
private:
- typedef index::PostingListFileRandRead DiskPostingFile;
- typedef Zc4PosOccRandRead DiskPostingFileReal;
- typedef ZcPosOccRandRead DiskPostingFileDynamicKReal;
- typedef vespalib::cache<vespalib::CacheParam<vespalib::LruParam<Key, LookupResultVector>, DiskIndex>> Cache;
+ using DiskPostingFile = index::PostingListFileRandRead;
+ using DiskPostingFileReal = Zc4PosOccRandRead;
+ using DiskPostingFileDynamicKReal = ZcPosOccRandRead;
+ using Cache = vespalib::cache<vespalib::CacheParam<vespalib::LruParam<Key, LookupResultVector>, DiskIndex>>;
vespalib::string _indexDir;
size_t _cacheSize;
@@ -83,11 +83,11 @@ private:
public:
/**
- * Create a view of the disk index located in the given directory
- * described by the given schema.
+ * Create a view of the disk index located in the given directory.
*
* @param indexDir the directory where the disk index is located.
- **/
+ * @param cacheSize optional size (in bytes) of the disk dictionary lookup cache.
+ */
DiskIndex(const vespalib::string &indexDir, size_t cacheSize=0);
~DiskIndex();
@@ -95,29 +95,27 @@ public:
* Setup this instance by opening and loading relevant index files.
*
* @return true if this instance was successfully setup.
- **/
+ */
bool setup(const TuneFileSearch &tuneFileSearch);
bool setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old);
/**
- * Perform a dictionary lookup for the given word in the given
- * field.
+ * Perform a dictionary lookup for the given word in the given field.
*
- * @param indexId the id of the field to
- * perform lookup for.
+ * @param indexId the id of the field to perform lookup for.
* @param word the word to lookup.
* @return the lookup result or nullptr if the word is not found.
- **/
+ */
LookupResult::UP lookup(uint32_t indexId, vespalib::stringref word);
- LookupResultVector lookup(const std::vector<uint32_t> & indexes, vespalib::stringref word);
+ LookupResultVector lookup(const std::vector<uint32_t> & indexes, vespalib::stringref word);
/**
* Read the posting list corresponding to the given lookup result.
*
* @param lookupRes the result of the previous dictionary lookup.
* @return a handle for the posting list in memory.
- **/
+ */
index::PostingListHandle::UP readPostingList(const LookupResult &lookupRes) const;
/**
@@ -126,22 +124,19 @@ public:
* @param lookupRes the result of the previous dictionary lookup.
* @return the bit vector or nullptr if no bit vector exists for the
* word in the lookup result.
- **/
+ */
BitVector::UP readBitVector(const LookupResult &lookupRes) const;
- queryeval::Blueprint::UP
- createBlueprint(const queryeval::IRequestContext & requestContext,
- const queryeval::FieldSpec &field,
- const query::Node &term) override;
+ queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpec &field,
+ const query::Node &term) override;
- queryeval::Blueprint::UP
- createBlueprint(const queryeval::IRequestContext & requestContext,
- const queryeval::FieldSpecList &fields,
- const query::Node &term) override;
+ queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext,
+ const queryeval::FieldSpecList &fields,
+ const query::Node &term) override;
/**
* Get the size on disk of this index.
- * @return the size of the index.
*/
uint64_t getSize() const { return _size; }
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
index 6454c0851a7..8c2b33a933e 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp
@@ -98,7 +98,6 @@ FieldWriter::open(const vespalib::string &prefix,
return true;
}
-
void
FieldWriter::flush()
{
@@ -120,7 +119,6 @@ FieldWriter::flush()
}
}
-
void
FieldWriter::newWord(uint64_t wordNum, vespalib::stringref word)
{
@@ -134,14 +132,12 @@ FieldWriter::newWord(uint64_t wordNum, vespalib::stringref word)
_prevDocId = 0;
}
-
void
FieldWriter::newWord(vespalib::stringref word)
{
newWord(_wordNum + 1, word);
}
-
bool
FieldWriter::close()
{
@@ -183,7 +179,6 @@ FieldWriter::getFeatureParams(PostingListParams &params)
_posoccfile->getFeatureParams(params);
}
-
static const char *termOccNames[] =
{
"boolocc.bdat",
@@ -199,7 +194,6 @@ static const char *termOccNames[] =
nullptr,
};
-
void
FieldWriter::remove(const vespalib::string &prefix)
{
diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
index 9a6edf90243..1e9afb717e8 100644
--- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
+++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
@@ -10,15 +10,13 @@
namespace search::diskindex {
-/*
- * FieldWriter is used to write a dictionary and posting list file
- * together.
+/**
+ * FieldWriter is used to write a dictionary and posting list file together.
*
* It is used by the fusion code to write the merged output for a field,
* and by the memory index dump code to write a field to disk.
*/
-class FieldWriter
-{
+class FieldWriter {
private:
uint64_t _wordNum;
uint32_t _prevDocId;
@@ -28,14 +26,15 @@ public:
using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite;
- typedef index::PostingListFileSeqWrite PostingListFileSeqWrite;
- typedef index::DocIdAndFeatures DocIdAndFeatures;
- typedef index::Schema Schema;
- typedef index::PostingListCounts PostingListCounts;
- typedef index::PostingListParams PostingListParams;
+ using PostingListFileSeqWrite = index::PostingListFileSeqWrite;
+ using DocIdAndFeatures = index::DocIdAndFeatures;
+ using Schema = index::Schema;
+ using PostingListCounts = index::PostingListCounts;
+ using PostingListParams = index::PostingListParams;
std::unique_ptr<DictionaryFileSeqWrite> _dictFile;
std::unique_ptr<PostingListFileSeqWrite> _posoccfile;
+
private:
BitVectorCandidate _bvc;
BitVectorFileWrite _bmapfile;
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
index a3c37cb91f6..964f37eb5cf 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
@@ -33,8 +33,7 @@ noWordPos()
}
-class FileHandle
-{
+class FileHandle {
public:
FieldWriter *_fieldWriter;
DocIdAndFeatures _docIdAndFeatures;
@@ -43,22 +42,18 @@ public:
~FileHandle();
- void
- open(vespalib::stringref dir,
- const SchemaUtil::IndexIterator &index,
- uint32_t docIdLimit, uint64_t numWordIds,
- const TuneFileSeqWrite &tuneFileWrite,
- const FileHeaderContext &fileHeaderContext);
+ void open(vespalib::stringref dir,
+ const SchemaUtil::IndexIterator &index,
+ uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext);
- void
- close();
+ void close();
};
-
}
-class IndexBuilder::FieldHandle
-{
+class IndexBuilder::FieldHandle {
public:
FieldHandle(const Schema &schema,
uint32_t fieldId,
@@ -66,20 +61,15 @@ public:
~FieldHandle();
- static uint32_t
- noDocRef()
- {
+ static uint32_t noDocRef() {
return std::numeric_limits<uint32_t>::max();
}
- static uint32_t
- noElRef()
- {
+ static uint32_t noElRef() {
return std::numeric_limits<uint32_t>::max();
}
- class FHWordDocFieldFeatures
- {
+ class FHWordDocFieldFeatures {
public:
uint32_t _docId;
uint32_t _numElements;
@@ -90,28 +80,12 @@ public:
{
}
- uint32_t
- getDocId() const
- {
- return _docId;
- }
-
- uint32_t
- getNumElements() const
- {
- return _numElements;
- }
-
- void
- incNumElements()
- {
- ++_numElements;
- }
+ uint32_t getDocId() const { return _docId; }
+ uint32_t getNumElements() const { return _numElements; }
+ void incNumElements() { ++_numElements; }
};
- class FHWordDocElementFeatures
- : public WordDocElementFeatures
- {
+ class FHWordDocElementFeatures : public WordDocElementFeatures {
public:
uint32_t _docRef;
@@ -127,24 +101,21 @@ public:
}
};
- class FHWordDocElementWordPosFeatures
- : public WordDocElementWordPosFeatures
- {
+ class FHWordDocElementWordPosFeatures : public WordDocElementWordPosFeatures {
public:
uint32_t _elementRef;
- FHWordDocElementWordPosFeatures(
- const WordDocElementWordPosFeatures &features,
- uint32_t elementRef)
+ FHWordDocElementWordPosFeatures(const WordDocElementWordPosFeatures &features,
+ uint32_t elementRef)
: WordDocElementWordPosFeatures(features),
_elementRef(elementRef)
{
}
};
- typedef vespalib::Array<FHWordDocFieldFeatures> FHWordDocFieldFeaturesVector;
- typedef vespalib::Array<FHWordDocElementFeatures> FHWordDocElementFeaturesVector;
- typedef vespalib::Array<FHWordDocElementWordPosFeatures> FHWordDocElementWordPosFeaturesVector;
+ using FHWordDocFieldFeaturesVector = vespalib::Array<FHWordDocFieldFeatures>;
+ using FHWordDocElementFeaturesVector = vespalib::Array<FHWordDocElementFeatures>;
+ using FHWordDocElementWordPosFeaturesVector = vespalib::Array<FHWordDocElementWordPosFeatures>;
FHWordDocFieldFeaturesVector _wdff;
FHWordDocElementFeaturesVector _wdfef;
@@ -162,72 +133,35 @@ public:
FileHandle _files;
- void
- startWord(vespalib::stringref word);
-
- void
- endWord();
-
- void
- startDocument(uint32_t docId);
-
- void
- endDocument();
-
- void
- startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen);
-
- void
- endElement();
-
- void
- addOcc(const WordDocElementWordPosFeatures &features);
-
- void
- setValid()
- {
- _valid = true;
- }
-
- bool
- getValid() const
- {
- return _valid;
- }
-
- const Schema::IndexField &
- getSchemaField();
-
- const vespalib::string &
- getName();
-
- vespalib::string
- getDir();
-
- void
- open(uint32_t docIdLimit, uint64_t numWordIds,
- const TuneFileSeqWrite &tuneFileWrite,
- const FileHeaderContext &fileHeaderContext);
-
- void
- close();
-
- uint32_t
- getIndexId() const
- {
- return _fieldId;
- }
+ void startWord(vespalib::stringref word);
+ void endWord();
+ void startDocument(uint32_t docId);
+ void endDocument();
+ void startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen);
+ void endElement();
+ void addOcc(const WordDocElementWordPosFeatures &features);
+
+ const Schema::IndexField &getSchemaField();
+ const vespalib::string &getName();
+ vespalib::string getDir();
+ void open(uint32_t docIdLimit, uint64_t numWordIds,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext);
+ void close();
+
+ void setValid() { _valid = true; }
+ bool getValid() const { return _valid; }
+ uint32_t getIndexId() const { return _fieldId; }
};
namespace {
-class SingleIterator
-{
+class SingleIterator {
public:
- typedef IndexBuilder::FieldHandle FH;
+ using FH = IndexBuilder::FieldHandle;
FH::FHWordDocFieldFeaturesVector::const_iterator _dFeatures;
FH::FHWordDocFieldFeaturesVector::const_iterator _dFeaturesE;
FH::FHWordDocElementFeaturesVector::const_iterator _elFeatures;
@@ -237,18 +171,13 @@ public:
SingleIterator(FH &fieldHandle, uint32_t localFieldId);
- void
- appendFeatures(DocIdAndFeatures &features);
+ void appendFeatures(DocIdAndFeatures &features);
- bool
- isValid() const
- {
+ bool isValid() const {
return _dFeatures != _dFeaturesE;
}
- bool
- operator<(const SingleIterator &rhs) const
- {
+ bool operator<(const SingleIterator &rhs) const {
if (_docId != rhs._docId) {
return _docId < rhs._docId;
}
@@ -256,23 +185,19 @@ public:
}
};
-
}
-
FileHandle::FileHandle()
: _fieldWriter(nullptr),
_docIdAndFeatures()
{
}
-
FileHandle::~FileHandle()
{
delete _fieldWriter;
}
-
void
FileHandle::open(vespalib::stringref dir,
const SchemaUtil::IndexIterator &index,
@@ -293,7 +218,6 @@ FileHandle::open(vespalib::stringref dir,
}
}
-
void
FileHandle::close()
{
@@ -312,7 +236,6 @@ FileHandle::close()
(void) ret;
}
-
IndexBuilder::FieldHandle::FieldHandle(const Schema &schema,
uint32_t fieldId,
IndexBuilder *ib)
@@ -331,10 +254,8 @@ IndexBuilder::FieldHandle::FieldHandle(const Schema &schema,
{
}
-
IndexBuilder::FieldHandle::~FieldHandle() = default;
-
void
IndexBuilder::FieldHandle::startWord(vespalib::stringref word)
{
@@ -342,7 +263,6 @@ IndexBuilder::FieldHandle::startWord(vespalib::stringref word)
_files._fieldWriter->newWord(word);
}
-
void
IndexBuilder::FieldHandle::endWord()
{
@@ -362,7 +282,6 @@ IndexBuilder::FieldHandle::endWord()
_elRef = noElRef();
}
-
void
IndexBuilder::FieldHandle::startDocument(uint32_t docId)
{
@@ -373,7 +292,6 @@ IndexBuilder::FieldHandle::startDocument(uint32_t docId)
_lowestOKElementId = 0u;
}
-
void
IndexBuilder::FieldHandle::endDocument()
{
@@ -385,12 +303,10 @@ IndexBuilder::FieldHandle::endDocument()
_docRef = noDocRef();
}
-
void
-IndexBuilder::FieldHandle::
-startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen)
+IndexBuilder::FieldHandle::startElement(uint32_t elementId,
+ int32_t weight,
+ uint32_t elementLen)
{
assert(_docRef != noDocRef());
assert(_elRef == noElRef());
@@ -407,7 +323,6 @@ startElement(uint32_t elementId,
_lowestOKWordPos = 0u;
}
-
void
IndexBuilder::FieldHandle::endElement()
{
@@ -418,10 +333,8 @@ IndexBuilder::FieldHandle::endElement()
_lowestOKElementId = ef.getElementId() + 1;
}
-
void
-IndexBuilder::FieldHandle::
-addOcc(const WordDocElementWordPosFeatures &features)
+IndexBuilder::FieldHandle::addOcc(const WordDocElementWordPosFeatures &features)
{
assert(_elRef != noElRef());
FHWordDocElementFeatures &ef = _wdfef[_elRef];
@@ -435,29 +348,24 @@ addOcc(const WordDocElementWordPosFeatures &features)
ef.incNumOccs();
}
-
const Schema::IndexField &
IndexBuilder::FieldHandle::getSchemaField()
{
return _schema->getIndexField(_fieldId);
}
-
const vespalib::string &
IndexBuilder::FieldHandle::getName()
{
return getSchemaField().getName();
-
}
-
vespalib::string
IndexBuilder::FieldHandle::getDir()
{
return _ib->appendToPrefix(getName());
}
-
void
IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds,
const TuneFileSeqWrite &tuneFileWrite,
@@ -468,14 +376,12 @@ IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds,
docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext);
}
-
void
IndexBuilder::FieldHandle::close()
{
_files.close();
}
-
SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId)
: _dFeatures(fieldHandle._wdff.begin()),
_dFeaturesE(fieldHandle._wdff.end()),
@@ -486,7 +392,6 @@ SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId)
{
}
-
void
SingleIterator::appendFeatures(DocIdAndFeatures &features)
{
@@ -511,7 +416,6 @@ SingleIterator::appendFeatures(DocIdAndFeatures &features)
}
}
-
IndexBuilder::IndexBuilder(const Schema &schema)
: index::IndexBuilder(schema),
_currentField(nullptr),
@@ -541,6 +445,27 @@ IndexBuilder::IndexBuilder(const Schema &schema)
IndexBuilder::~IndexBuilder() = default;
void
+IndexBuilder::startField(uint32_t fieldId)
+{
+ assert(_curDocId == noDocId());
+ assert(_currentField == nullptr);
+ assert(fieldId < _fields.size());
+ assert(fieldId >= _lowestOKFieldId);
+ _currentField = &_fields[fieldId];
+ assert(_currentField != nullptr);
+}
+
+void
+IndexBuilder::endField()
+{
+ assert(_curDocId == noDocId());
+ assert(!_inWord);
+ assert(_currentField != nullptr);
+ _lowestOKFieldId = _currentField->_fieldId + 1;
+ _currentField = nullptr;
+}
+
+void
IndexBuilder::startWord(vespalib::stringref word)
{
assert(_currentField != nullptr);
@@ -551,7 +476,6 @@ IndexBuilder::startWord(vespalib::stringref word)
_currentField->startWord(word);
}
-
void
IndexBuilder::endWord()
{
@@ -562,7 +486,6 @@ IndexBuilder::endWord()
_lowestOKDocId = 1u;
}
-
void
IndexBuilder::startDocument(uint32_t docId)
{
@@ -575,7 +498,6 @@ IndexBuilder::startDocument(uint32_t docId)
_currentField->startDocument(docId);
}
-
void
IndexBuilder::endDocument()
{
@@ -586,30 +508,6 @@ IndexBuilder::endDocument()
_curDocId = noDocId();
}
-
-void
-IndexBuilder::startField(uint32_t fieldId)
-{
- assert(_curDocId == noDocId());
- assert(_currentField == nullptr);
- assert(fieldId < _fields.size());
- assert(fieldId >= _lowestOKFieldId);
- _currentField = &_fields[fieldId];
- assert(_currentField != nullptr);
-}
-
-
-void
-IndexBuilder::endField()
-{
- assert(_curDocId == noDocId());
- assert(!_inWord);
- assert(_currentField != nullptr);
- _lowestOKFieldId = _currentField->_fieldId + 1;
- _currentField = nullptr;
-}
-
-
void
IndexBuilder::startElement(uint32_t elementId,
int32_t weight,
@@ -619,7 +517,6 @@ IndexBuilder::startElement(uint32_t elementId,
_currentField->startElement(elementId, weight, elementLen);
}
-
void
IndexBuilder::endElement()
{
@@ -627,7 +524,6 @@ IndexBuilder::endElement()
_currentField->endElement();
}
-
void
IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features)
{
@@ -635,14 +531,12 @@ IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features)
_currentField->addOcc(features);
}
-
void
IndexBuilder::setPrefix(vespalib::stringref prefix)
{
_prefix = prefix;
}
-
vespalib::string
IndexBuilder::appendToPrefix(vespalib::stringref name)
{
@@ -652,7 +546,6 @@ IndexBuilder::appendToPrefix(vespalib::stringref name)
return _prefix + "/" + name;
}
-
void
IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds,
const TuneFileIndexing &tuneFileIndexing,
@@ -682,7 +575,6 @@ IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds,
}
}
-
void
IndexBuilder::close()
{
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
index fa818bf08e6..43ac49a0a72 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
@@ -13,12 +13,16 @@ namespace search::diskindex {
class BitVectorCandidate;
-class IndexBuilder : public index::IndexBuilder
-{
+/**
+ * Class used to build a disk index for the set of index fields specified in a schema.
+ *
+ * The resulting disk index consists of field indexes that are independent of each other.
+ */
+class IndexBuilder : public index::IndexBuilder {
public:
class FieldHandle;
- typedef index::Schema Schema;
+ using Schema = index::Schema;
private:
// Text fields
FieldHandle *_currentField;
@@ -49,19 +53,16 @@ public:
IndexBuilder(const Schema &schema);
~IndexBuilder() override;
+ void startField(uint32_t fieldId) override;
+ void endField() override;
void startWord(vespalib::stringref word) override;
void endWord() override;
void startDocument(uint32_t docId) override;
void endDocument() override;
- void startField(uint32_t fieldId) override;
- void endField() override;
void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) override;
void endElement() override;
void addOcc(const WordDocElementWordPosFeatures &features) override;
- // TODO: methods for attribute vectors.
-
- // TODO: methods for document summary.
void setPrefix(vespalib::stringref prefix);
vespalib::string appendToPrefix(vespalib::stringref name);
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp
index 6b88c51e6cc..d585238107a 100644
--- a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp
@@ -6,7 +6,8 @@ namespace search::index {
IndexBuilder::IndexBuilder(const Schema &schema)
: _schema(schema)
-{ }
+{
+}
IndexBuilder::~IndexBuilder() = default;
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h
index 66ca740a20c..0496809336b 100644
--- a/searchlib/src/vespa/searchlib/index/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h
@@ -8,6 +8,15 @@ namespace search::index {
class Schema;
class WordDocElementWordPosFeatures;
+/**
+ * Interface used to build an index for the set of index fields specified in a schema.
+ *
+ *
+ * The index should be built as follows:
+ * For each field add the set of unique words in sorted order.
+ * For each word add the set of document ids in sorted order.
+ * For each document id add the position information for that document.
+ */
class IndexBuilder {
protected:
const Schema &_schema;
@@ -15,39 +24,16 @@ protected:
public:
IndexBuilder(const Schema &schema);
- virtual
- ~IndexBuilder();
-
- virtual void
- startWord(vespalib::stringref word) = 0;
-
- virtual void
- endWord() = 0;
-
- virtual void
- startDocument(uint32_t docId) = 0;
-
- virtual void
- endDocument() = 0;
-
- virtual void
- startField(uint32_t fieldId) = 0;
-
- virtual void
- endField() = 0;
-
- virtual void
- startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0;
-
- virtual void
- endElement() = 0;
-
- virtual void
- addOcc(const WordDocElementWordPosFeatures &features) = 0;
-
- // TODO: methods for attribute vectors.
-
- // TODO: methods for document summary.
+ virtual ~IndexBuilder();
+ virtual void startField(uint32_t fieldId) = 0;
+ virtual void endField() = 0;
+ virtual void startWord(vespalib::stringref word) = 0;
+ virtual void endWord() = 0;
+ virtual void startDocument(uint32_t docId) = 0;
+ virtual void endDocument() = 0;
+ virtual void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0;
+ virtual void endElement() = 0;
+ virtual void addOcc(const WordDocElementWordPosFeatures &features) = 0;
};
}