summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-04-30 10:49:33 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-04-30 10:49:33 +0000
commit45e35a96de891fb90a6190e19d5d2ad20dfecb44 (patch)
tree4248a52f2ce4642a51b1035b2bf9a8bba94e126d /searchlib
parent7fb9ed41d77810803772f9459a6df5076127555c (diff)
Simplify IndexBuilder API used when dumping a memory index to a disk index.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp90
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp349
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.h8
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp17
6 files changed, 92 insertions, 416 deletions
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index a63746548f0..e2401c1ad7c 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/index/docbuilder.h>
+#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/memoryindex/document_inverter.h>
#include <vespa/searchlib/memoryindex/field_index_collection.h>
@@ -45,13 +46,9 @@ private:
std::stringstream _ss;
bool _insideWord;
bool _insideField;
- bool _insideDoc;
- bool _insideElem;
bool _firstWord;
bool _firstField;
bool _firstDoc;
- bool _firstElem;
- bool _firstPos;
public:
MyBuilder(const Schema &schema)
@@ -59,13 +56,9 @@ public:
_ss(),
_insideWord(false),
_insideField(false),
- _insideDoc(false),
- _insideElem(false),
_firstWord(true),
_firstField(true),
- _firstDoc(true),
- _firstElem(true),
- _firstPos(true)
+ _firstDoc(true)
{}
virtual void startWord(vespalib::stringref word) override {
@@ -80,7 +73,6 @@ public:
virtual void endWord() override {
assert(_insideWord);
- assert(!_insideDoc);
_ss << "]";
_firstWord = false;
_insideWord = false;
@@ -102,48 +94,33 @@ public:
_insideField = false;
}
- virtual void startDocument(uint32_t docId) override {
+ virtual void add_document(const DocIdAndFeatures &features) override {
assert(_insideWord);
- assert(!_insideDoc);
- if (!_firstDoc) _ss << ",";
- _ss << "d=" << docId << "[";
- _firstElem = true;
- _insideDoc = true;
- }
-
- virtual void endDocument() override {
- assert(_insideDoc);
- assert(!_insideElem);
- _ss << "]";
- _firstDoc = false;
- _insideDoc = false;
- }
-
- virtual void startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen) override {
- assert(_insideDoc);
- assert(!_insideElem);
- if (!_firstElem)
+ if (!_firstDoc) {
_ss << ",";
- _ss << "e=" << elementId <<
- ",w=" << weight << ",l=" << elementLen << "[";
- _firstPos = true;
- _insideElem = true;
- }
-
- virtual void endElement() override {
- assert(_insideElem);
+ }
+ _ss << "d=" << features._docId << "[";
+ bool first_elem = true;
+ size_t word_pos_offset = 0;
+ for (const auto& elem : features._elements) {
+ if (!first_elem) {
+ _ss << ",";
+ }
+ _ss << "e=" << elem.getElementId() << ",w=" << elem.getWeight() << ",l=" << elem.getElementLen() << "[";
+ bool first_pos = true;
+ for (size_t i = 0; i < elem.getNumOccs(); ++i) {
+ if (!first_pos) {
+ _ss << ",";
+ }
+ _ss << features._wordPositions[i + word_pos_offset].getWordPos();
+ first_pos = false;
+ }
+ word_pos_offset += elem.getNumOccs();
+ _ss << "]";
+ first_elem = false;
+ }
_ss << "]";
- _firstElem = false;
- _insideElem = false;
- }
-
- virtual void addOcc(const WordDocElementWordPosFeatures &features) override {
- assert(_insideElem);
- if (!_firstPos) _ss << ",";
- _ss << features.getWordPos();
- _firstPos = false;
+ _firstDoc = false;
}
std::string toStr() const {
@@ -701,14 +678,13 @@ TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_
WordDocElementWordPosFeatures wpf;
b.startField(4);
b.startWord("a");
- b.startDocument(2);
- b.startElement(0, 10, 20);
- wpf.setWordPos(1);
- b.addOcc(wpf);
- wpf.setWordPos(3);
- b.addOcc(wpf);
- b.endElement();
- b.endDocument();
+ DocIdAndFeatures features;
+ features._docId = 2;
+ features._elements.emplace_back(0, 10, 20);
+ features._elements.back().setNumOccs(2);
+ features._wordPositions.emplace_back(1);
+ features._wordPositions.emplace_back(3);
+ b.add_document(features);
b.endWord();
b.endField();
EXPECT_EQ("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr());
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
index 964f37eb5cf..42f6971e53f 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
@@ -26,20 +26,12 @@ using index::WordDocElementFeatures;
using index::schema::DataType;
using vespalib::getLastErrorString;
-uint32_t
-noWordPos()
-{
- return std::numeric_limits<uint32_t>::max();
-}
-
-
class FileHandle {
-public:
- FieldWriter *_fieldWriter;
- DocIdAndFeatures _docIdAndFeatures;
+private:
+ std::shared_ptr<FieldWriter> _fieldWriter;
+public:
FileHandle();
-
~FileHandle();
void open(vespalib::stringref dir,
@@ -49,99 +41,29 @@ public:
const FileHeaderContext &fileHeaderContext);
void close();
+
+ FieldWriter* writer() { return _fieldWriter.get(); }
};
}
class IndexBuilder::FieldHandle {
+private:
+ bool _valid;
+ const Schema *_schema; // Ptr to allow being std::vector member
+ uint32_t _fieldId;
+ IndexBuilder *_builder; // Ptr to allow being std::vector member
+ FileHandle _file;
+
public:
FieldHandle(const Schema &schema,
uint32_t fieldId,
- IndexBuilder *ib);
+ IndexBuilder *builder);
~FieldHandle();
- static uint32_t noDocRef() {
- return std::numeric_limits<uint32_t>::max();
- }
-
- static uint32_t noElRef() {
- return std::numeric_limits<uint32_t>::max();
- }
-
- class FHWordDocFieldFeatures {
- public:
- uint32_t _docId;
- uint32_t _numElements;
-
- FHWordDocFieldFeatures(uint32_t docId)
- : _docId(docId),
- _numElements(0u)
- {
- }
-
- uint32_t getDocId() const { return _docId; }
- uint32_t getNumElements() const { return _numElements; }
- void incNumElements() { ++_numElements; }
- };
-
- class FHWordDocElementFeatures : public WordDocElementFeatures {
- public:
- uint32_t _docRef;
-
- FHWordDocElementFeatures(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen,
- uint32_t docRef)
- : WordDocElementFeatures(elementId),
- _docRef(docRef)
- {
- setWeight(weight);
- setElementLen(elementLen);
- }
- };
-
- class FHWordDocElementWordPosFeatures : public WordDocElementWordPosFeatures {
- public:
- uint32_t _elementRef;
-
- FHWordDocElementWordPosFeatures(const WordDocElementWordPosFeatures &features,
- uint32_t elementRef)
- : WordDocElementWordPosFeatures(features),
- _elementRef(elementRef)
- {
- }
- };
-
- using FHWordDocFieldFeaturesVector = vespalib::Array<FHWordDocFieldFeatures>;
- using FHWordDocElementFeaturesVector = vespalib::Array<FHWordDocElementFeatures>;
- using FHWordDocElementWordPosFeaturesVector = vespalib::Array<FHWordDocElementWordPosFeatures>;
-
- FHWordDocFieldFeaturesVector _wdff;
- FHWordDocElementFeaturesVector _wdfef;
- FHWordDocElementWordPosFeaturesVector _wdfepf;
-
- uint32_t _docRef;
- uint32_t _elRef;
- bool _valid;
- const Schema *_schema; // Ptr to allow being std::vector member
- uint32_t _fieldId;
- IndexBuilder *_ib; // Ptr to allow being std::vector member
-
- uint32_t _lowestOKElementId;
- uint32_t _lowestOKWordPos;
-
- FileHandle _files;
-
- void startWord(vespalib::stringref word);
- void endWord();
- void startDocument(uint32_t docId);
- void endDocument();
- void startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen);
- void endElement();
- void addOcc(const WordDocElementWordPosFeatures &features);
+ void new_word(vespalib::stringref word);
+ void add_document(const index::DocIdAndFeatures &features);
const Schema::IndexField &getSchemaField();
const vespalib::string &getName();
@@ -157,46 +79,12 @@ public:
};
-namespace {
-
-class SingleIterator {
-public:
- using FH = IndexBuilder::FieldHandle;
- FH::FHWordDocFieldFeaturesVector::const_iterator _dFeatures;
- FH::FHWordDocFieldFeaturesVector::const_iterator _dFeaturesE;
- FH::FHWordDocElementFeaturesVector::const_iterator _elFeatures;
- FH::FHWordDocElementWordPosFeaturesVector::const_iterator _pFeatures;
- uint32_t _docId;
- uint32_t _localFieldId;
-
- SingleIterator(FH &fieldHandle, uint32_t localFieldId);
-
- void appendFeatures(DocIdAndFeatures &features);
-
- bool isValid() const {
- return _dFeatures != _dFeaturesE;
- }
-
- bool operator<(const SingleIterator &rhs) const {
- if (_docId != rhs._docId) {
- return _docId < rhs._docId;
- }
- return _localFieldId < rhs._localFieldId;
- }
-};
-
-}
-
FileHandle::FileHandle()
- : _fieldWriter(nullptr),
- _docIdAndFeatures()
+ : _fieldWriter()
{
}
-FileHandle::~FileHandle()
-{
- delete _fieldWriter;
-}
+FileHandle::~FileHandle() = default;
void
FileHandle::open(vespalib::stringref dir,
@@ -205,9 +93,9 @@ FileHandle::open(vespalib::stringref dir,
const TuneFileSeqWrite &tuneFileWrite,
const FileHeaderContext &fileHeaderContext)
{
- assert(_fieldWriter == nullptr);
+ assert(_fieldWriter.get() == nullptr);
- _fieldWriter = new FieldWriter(docIdLimit, numWordIds);
+ _fieldWriter = std::make_shared<FieldWriter>(docIdLimit, numWordIds);
if (!_fieldWriter->open(dir + "/", 64, 262144u, false,
index.getSchema(), index.getIndex(),
@@ -224,11 +112,10 @@ FileHandle::close()
bool ret = true;
if (_fieldWriter != nullptr) {
bool closeRes = _fieldWriter->close();
- delete _fieldWriter;
- _fieldWriter = nullptr;
+ _fieldWriter.reset();
if (!closeRes) {
LOG(error,
- "Could not close term writer");
+ "Could not close field writer");
ret = false;
}
}
@@ -238,114 +125,28 @@ FileHandle::close()
IndexBuilder::FieldHandle::FieldHandle(const Schema &schema,
uint32_t fieldId,
- IndexBuilder *ib)
- : _wdff(),
- _wdfef(),
- _wdfepf(),
- _docRef(noDocRef()),
- _elRef(noElRef()),
- _valid(false),
+ IndexBuilder *builder)
+ : _valid(false),
_schema(&schema),
_fieldId(fieldId),
- _ib(ib),
- _lowestOKElementId(0u),
- _lowestOKWordPos(0u),
- _files()
+ _builder(builder),
+ _file()
{
}
IndexBuilder::FieldHandle::~FieldHandle() = default;
void
-IndexBuilder::FieldHandle::startWord(vespalib::stringref word)
+IndexBuilder::FieldHandle::new_word(vespalib::stringref word)
{
assert(_valid);
- _files._fieldWriter->newWord(word);
+ _file.writer()->newWord(word);
}
void
-IndexBuilder::FieldHandle::endWord()
+IndexBuilder::FieldHandle::add_document(const index::DocIdAndFeatures &features)
{
- DocIdAndFeatures &features = _files._docIdAndFeatures;
- SingleIterator si(*this, 0u);
- for (; si.isValid();) {
- features.clear(si._docId);
- si.appendFeatures(features);
- _files._fieldWriter->add(features);
- }
- assert(si._elFeatures == _wdfef.end());
- assert(si._pFeatures == _wdfepf.end());
- _wdff.clear();
- _wdfef.clear();
- _wdfepf.clear();
- _docRef = noDocRef();
- _elRef = noElRef();
-}
-
-void
-IndexBuilder::FieldHandle::startDocument(uint32_t docId)
-{
- assert(_docRef == noDocRef());
- assert(_wdff.empty() || _wdff.back().getDocId() < docId);
- _wdff.push_back(FHWordDocFieldFeatures(docId));
- _docRef = _wdff.size() - 1;
- _lowestOKElementId = 0u;
-}
-
-void
-IndexBuilder::FieldHandle::endDocument()
-{
- assert(_docRef != noDocRef());
- assert(_elRef == noElRef());
- FHWordDocFieldFeatures &ff = _wdff[_docRef];
- assert(ff.getNumElements() > 0);
- (void) ff;
- _docRef = noDocRef();
-}
-
-void
-IndexBuilder::FieldHandle::startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen)
-{
- assert(_docRef != noDocRef());
- assert(_elRef == noElRef());
- assert(elementId >= _lowestOKElementId);
-
- FHWordDocFieldFeatures &ff = _wdff[_docRef];
- _wdfef.push_back(
- FHWordDocElementFeatures(elementId,
- weight,
- elementLen,
- _docRef));
- ff.incNumElements();
- _elRef = _wdfef.size() - 1;
- _lowestOKWordPos = 0u;
-}
-
-void
-IndexBuilder::FieldHandle::endElement()
-{
- assert(_elRef != noElRef());
- FHWordDocElementFeatures &ef = _wdfef[_elRef];
- assert(ef.getNumOccs() > 0);
- _elRef = noElRef();
- _lowestOKElementId = ef.getElementId() + 1;
-}
-
-void
-IndexBuilder::FieldHandle::addOcc(const WordDocElementWordPosFeatures &features)
-{
- assert(_elRef != noElRef());
- FHWordDocElementFeatures &ef = _wdfef[_elRef];
- uint32_t wordPos = features.getWordPos();
- assert(wordPos < ef.getElementLen());
- assert(wordPos >= _lowestOKWordPos);
- _lowestOKWordPos = wordPos;
- _wdfepf.push_back(
- FHWordDocElementWordPosFeatures(features,
- _elRef));
- ef.incNumOccs();
+ _file.writer()->add(features);
}
const Schema::IndexField &
@@ -363,7 +164,7 @@ IndexBuilder::FieldHandle::getName()
vespalib::string
IndexBuilder::FieldHandle::getDir()
{
- return _ib->appendToPrefix(getName());
+ return _builder->appendToPrefix(getName());
}
void
@@ -371,49 +172,15 @@ IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds,
const TuneFileSeqWrite &tuneFileWrite,
const FileHeaderContext &fileHeaderContext)
{
- _files.open(getDir(),
- SchemaUtil::IndexIterator(*_schema, getIndexId()),
- docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext);
+ _file.open(getDir(),
+ SchemaUtil::IndexIterator(*_schema, getIndexId()),
+ docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext);
}
void
IndexBuilder::FieldHandle::close()
{
- _files.close();
-}
-
-SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId)
- : _dFeatures(fieldHandle._wdff.begin()),
- _dFeaturesE(fieldHandle._wdff.end()),
- _elFeatures(fieldHandle._wdfef.begin()),
- _pFeatures(fieldHandle._wdfepf.begin()),
- _docId(_dFeatures->getDocId()),
- _localFieldId(localFieldId)
-{
-}
-
-void
-SingleIterator::appendFeatures(DocIdAndFeatures &features)
-{
- uint32_t elCount = _dFeatures->getNumElements();
- for (uint32_t elId = 0; elId < elCount; ++elId, ++_elFeatures) {
- features._elements.push_back(*_elFeatures);
- features._elements.back().setNumOccs(0);
- uint32_t posCount = _elFeatures->getNumOccs();
- uint32_t lastWordPos = noWordPos();
- for (uint32_t posId = 0; posId < posCount; ++posId, ++_pFeatures) {
- uint32_t wordPos = _pFeatures->getWordPos();
- if (wordPos != lastWordPos) {
- lastWordPos = wordPos;
- features._elements.back().incNumOccs();
- features._wordPositions.push_back(*_pFeatures);
- }
- }
- }
- ++_dFeatures;
- if (_dFeatures != _dFeaturesE) {
- _docId = _dFeatures->getDocId();
- }
+ _file.close();
}
IndexBuilder::IndexBuilder(const Schema &schema)
@@ -461,7 +228,7 @@ IndexBuilder::endField()
assert(_curDocId == noDocId());
assert(!_inWord);
assert(_currentField != nullptr);
- _lowestOKFieldId = _currentField->_fieldId + 1;
+ _lowestOKFieldId = _currentField->getIndexId() + 1;
_currentField = nullptr;
}
@@ -473,7 +240,7 @@ IndexBuilder::startWord(vespalib::stringref word)
// TODO: Check sort order
_curWord = word;
_inWord = true;
- _currentField->startWord(word);
+ _currentField->new_word(word);
}
void
@@ -481,54 +248,16 @@ IndexBuilder::endWord()
{
assert(_inWord);
assert(_currentField != nullptr);
- _currentField->endWord();
_inWord = false;
_lowestOKDocId = 1u;
}
void
-IndexBuilder::startDocument(uint32_t docId)
-{
- assert(_curDocId == noDocId());
- assert(docId >= _lowestOKDocId);
- assert(docId < _docIdLimit);
- assert(_currentField != nullptr);
- _curDocId = docId;
- assert(_curDocId != noDocId());
- _currentField->startDocument(docId);
-}
-
-void
-IndexBuilder::endDocument()
-{
- assert(_curDocId != noDocId());
- assert(_currentField != nullptr);
- _currentField->endDocument();
- _lowestOKDocId = _curDocId + 1;
- _curDocId = noDocId();
-}
-
-void
-IndexBuilder::startElement(uint32_t elementId,
- int32_t weight,
- uint32_t elementLen)
-{
- assert(_currentField != nullptr);
- _currentField->startElement(elementId, weight, elementLen);
-}
-
-void
-IndexBuilder::endElement()
-{
- assert(_currentField != nullptr);
- _currentField->endElement();
-}
-
-void
-IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features)
+IndexBuilder::add_document(const index::DocIdAndFeatures &features)
{
+ assert(_inWord);
assert(_currentField != nullptr);
- _currentField->addOcc(features);
+ _currentField->add_document(features);
}
void
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
index 43ac49a0a72..a1a77d608cd 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
@@ -36,7 +36,7 @@ private:
uint32_t _docIdLimit;
uint64_t _numWordIds;
- const Schema &_schema; // Ptr to allow being std::vector member
+ const Schema &_schema;
static uint32_t noDocId() {
return std::numeric_limits<uint32_t>::max();
@@ -49,7 +49,7 @@ private:
public:
typedef index::WordDocElementWordPosFeatures WordDocElementWordPosFeatures;
- // schema argument must live until indexbuilder has been deleted.
+ // Schema argument must live until IndexBuilder has been deleted.
IndexBuilder(const Schema &schema);
~IndexBuilder() override;
@@ -57,11 +57,7 @@ public:
void endField() override;
void startWord(vespalib::stringref word) override;
void endWord() override;
- void startDocument(uint32_t docId) override;
- void endDocument() override;
- void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) override;
- void endElement() override;
- void addOcc(const WordDocElementWordPosFeatures &features) override;
+ void add_document(const index::DocIdAndFeatures &features) override;
void setPrefix(vespalib::stringref prefix);
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h
index 0496809336b..cf9df4bd154 100644
--- a/searchlib/src/vespa/searchlib/index/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h
@@ -5,13 +5,13 @@
namespace search::index {
+class DocIdAndFeatures;
class Schema;
class WordDocElementWordPosFeatures;
/**
* Interface used to build an index for the set of index fields specified in a schema.
*
- *
* The index should be built as follows:
* For each field add the set of unique words in sorted order.
* For each word add the set of document ids in sorted order.
@@ -29,11 +29,7 @@ public:
virtual void endField() = 0;
virtual void startWord(vespalib::stringref word) = 0;
virtual void endWord() = 0;
- virtual void startDocument(uint32_t docId) = 0;
- virtual void endDocument() = 0;
- virtual void startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0;
- virtual void endElement() = 0;
- virtual void addOcc(const WordDocElementWordPosFeatures &features) = 0;
+ virtual void add_document(const DocIdAndFeatures &features) = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 7d10895c32f..447187e5af7 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -169,23 +169,10 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder)
for (; pitr.valid(); ++pitr) {
uint32_t docId = pitr.getKey();
EntryRef featureRef(pitr.getData());
- indexBuilder.startDocument(docId);
_featureStore.setupForReadFeatures(featureRef, decoder);
decoder.readFeatures(features);
- size_t poff = 0;
- uint32_t wpIdx = 0u;
- size_t numElements = features._elements.size();
- for (size_t i = 0; i < numElements; ++i) {
- const WordDocElementFeatures & fef = features._elements[i];
- indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen());
- for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) {
- assert(wpIdx == poff + j);
- indexBuilder.addOcc(features._wordPositions[poff + j]);
- }
- poff += fef.getNumOccs();
- indexBuilder.endElement();
- }
- indexBuilder.endDocument();
+ features._docId = docId;
+ indexBuilder.add_document(features);
}
} else {
const PostingListKeyDataType *kd =
@@ -194,23 +181,10 @@ FieldIndex::dump(search::index::IndexBuilder & indexBuilder)
for (; kd != kde; ++kd) {
uint32_t docId = kd->_key;
EntryRef featureRef(kd->getData());
- indexBuilder.startDocument(docId);
_featureStore.setupForReadFeatures(featureRef, decoder);
decoder.readFeatures(features);
- size_t poff = 0;
- uint32_t wpIdx = 0u;
- size_t numElements = features._elements.size();
- for (size_t i = 0; i < numElements; ++i) {
- const WordDocElementFeatures & fef = features._elements[i];
- indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen());
- for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) {
- assert(wpIdx == poff + j);
- indexBuilder.addOcc(features._wordPositions[poff + j]);
- }
- poff += fef.getNumOccs();
- indexBuilder.endElement();
- }
- indexBuilder.endDocument();
+ features._docId = docId;
+ indexBuilder.add_document(features);
}
}
indexBuilder.endWord();
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
index f0bb1eb6519..28c5b1fa5df 100644
--- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
+++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
@@ -7,6 +7,7 @@
namespace search::diskindex {
+using index::DocIdAndFeatures;
using index::DummyFileHeaderContext;
using index::Schema;
using index::WordDocElementWordPosFeatures;
@@ -17,13 +18,17 @@ struct Builder
search::diskindex::IndexBuilder _ib;
TuneFileIndexing _tuneFileIndexing;
DummyFileHeaderContext _fileHeaderContext;
+ DocIdAndFeatures _features;
Builder(const std::string &dir,
const Schema &s,
uint32_t docIdLimit,
uint64_t numWordIds,
bool directio)
- : _ib(s)
+ : _ib(s),
+ _tuneFileIndexing(),
+ _fileHeaderContext(),
+ _features()
{
if (directio) {
_tuneFileIndexing._read.setWantDirectIO();
@@ -37,11 +42,11 @@ struct Builder
void
addDoc(uint32_t docId)
{
- _ib.startDocument(docId);
- _ib.startElement(0, 1, 1);
- _ib.addOcc(WordDocElementWordPosFeatures(0));
- _ib.endElement();
- _ib.endDocument();
+ _features.clear(docId);
+ _features._elements.emplace_back(0, 1, 1);
+ _features._elements.back().setNumOccs(1);
+ _features._wordPositions.emplace_back(0);
+ _ib.add_document(_features);
}
void