aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchcore/src/tests/proton/feed_and_search/feed_and_search_test.cpp10
-rw-r--r--searchcore/src/tests/proton/index/fusionrunner_test.cpp23
-rw-r--r--searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp5
-rw-r--r--searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp3
-rw-r--r--searchlib/src/tests/diskindex/fusion/fusion_test.cpp17
-rw-r--r--searchlib/src/tests/memoryindex/field_index/field_index_test.cpp166
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp252
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/indexbuilder.h35
-rw-r--r--searchlib/src/vespa/searchlib/index/indexbuilder.h18
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h5
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/i_field_index.h4
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp67
16 files changed, 290 insertions, 337 deletions
diff --git a/searchcore/src/tests/proton/feed_and_search/feed_and_search_test.cpp b/searchcore/src/tests/proton/feed_and_search/feed_and_search_test.cpp
index c773abef69f..0936f05b222 100644
--- a/searchcore/src/tests/proton/feed_and_search/feed_and_search_test.cpp
+++ b/searchcore/src/tests/proton/feed_and_search/feed_and_search_test.cpp
@@ -176,12 +176,14 @@ void Test::requireThatMemoryIndexCanBeDumpedAndSearched() {
const string index_dir = "test_index";
const uint32_t docIdLimit = memory_index.getDocIdLimit();
const uint64_t num_words = memory_index.getNumWords();
- IndexBuilder index_builder(schema, index_dir, docIdLimit);
search::TuneFileIndexing tuneFileIndexing;
DummyFileHeaderContext fileHeaderContext;
- index_builder.open(num_words, MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext);
- memory_index.dump(index_builder);
- index_builder.close();
+ {
+ MockFieldLengthInspector fieldLengthInspector;
+ IndexBuilder index_builder(schema, index_dir, docIdLimit, num_words,
+ fieldLengthInspector, tuneFileIndexing, fileHeaderContext);
+ memory_index.dump(index_builder);
+ }
// Fusion test. Keep all documents to get an "indentical" copy.
const string index_dir2 = "test_index2";
diff --git a/searchcore/src/tests/proton/index/fusionrunner_test.cpp b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
index ac02c644885..a58d7540d7c 100644
--- a/searchcore/src/tests/proton/index/fusionrunner_test.cpp
+++ b/searchcore/src/tests/proton/index/fusionrunner_test.cpp
@@ -23,7 +23,6 @@
#include <vespa/vespalib/util/gate.h>
#include <vespa/vespalib/util/destructor_callbacks.h>
#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/vespalib/util/size_literals.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <filesystem>
#include <set>
@@ -96,7 +95,7 @@ class Test : public vespalib::TestApp {
public:
Test();
- ~Test();
+ ~Test() override;
int Main() override;
};
@@ -145,17 +144,15 @@ getSchema()
void Test::setUp() {
std::filesystem::remove_all(std::filesystem::path(base_dir));
- _fusion_runner.reset(new FusionRunner(base_dir, getSchema(),
- TuneFileAttributes(),
- _fileHeaderContext));
+ _fusion_runner = std::make_unique<FusionRunner>(base_dir, getSchema(), TuneFileAttributes(), _fileHeaderContext);
const string selector_base = base_dir + "/index.flush.0/selector";
- _selector.reset(new FixedSourceSelector(0, selector_base));
+ _selector = std::make_unique<FixedSourceSelector>(0, selector_base);
_fusion_spec = FusionSpec();
}
void Test::tearDown() {
std::filesystem::remove_all(std::filesystem::path(base_dir));
- _selector.reset(0);
+ _selector.reset(nullptr);
}
Document::UP buildDocument(DocBuilder & doc_builder, int id, const string &word) {
@@ -200,12 +197,14 @@ void Test::createIndex(const string &dir, uint32_t id, bool fusion) {
addDocument(doc_builder, memory_index, *_selector, id, id + 3, "qux");
const uint32_t docIdLimit = std::min(memory_index.getDocIdLimit(), _selector->getDocIdLimit());
- IndexBuilder index_builder(schema, index_dir, docIdLimit);
- TuneFileIndexing tuneFileIndexing;
TuneFileAttributes tuneFileAttributes;
- index_builder.open(memory_index.getNumWords(), MockFieldLengthInspector(), tuneFileIndexing, _fileHeaderContext);
- memory_index.dump(index_builder);
- index_builder.close();
+ {
+ TuneFileIndexing tuneFileIndexing;
+ MockFieldLengthInspector fieldLengthInspector;
+ IndexBuilder index_builder(schema, index_dir, docIdLimit, memory_index.getNumWords(), fieldLengthInspector,
+ tuneFileIndexing, _fileHeaderContext);
+ memory_index.dump(index_builder);
+ }
_selector->extractSaveInfo(index_dir + "/selector")->save(tuneFileAttributes, _fileHeaderContext);
}
diff --git a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
index fb14534c47c..b4d32cb4376 100644
--- a/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
+++ b/searchcore/src/vespa/searchcore/proton/index/memoryindexwrapper.cpp
@@ -36,11 +36,10 @@ MemoryIndexWrapper::flushToDisk(const vespalib::string &flushDir, uint32_t docId
{
const uint64_t numWords = _index.getNumWords();
_index.freeze(); // TODO(geirst): is this needed anymore?
- IndexBuilder indexBuilder(_index.getSchema(), flushDir, docIdLimit);
SerialNumFileHeaderContext fileHeaderContext(_fileHeaderContext, serialNum);
- indexBuilder.open(numWords, *this, _tuneFileIndexing, fileHeaderContext);
+ IndexBuilder indexBuilder(_index.getSchema(), flushDir, docIdLimit,
+ numWords, *this, _tuneFileIndexing, fileHeaderContext);
_index.dump(indexBuilder);
- indexBuilder.close();
}
search::SerialNum
diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
index b96e63bb47e..552ce0e2f0b 100644
--- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
+++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
@@ -382,7 +382,8 @@ DiskIndexTest::build_index(const IOSettings& io_settings, const EmptySettings& e
if (empty_settings._empty_word) {
name << "we";
}
- openIndex(name.str(), io_settings._use_directio, io_settings._use_mmap, empty_settings._empty_field, empty_settings._empty_doc, empty_settings._empty_word);
+ openIndex(name.str(), io_settings._use_directio, io_settings._use_mmap, empty_settings._empty_field,
+ empty_settings._empty_doc, empty_settings._empty_word);
}
void
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
index 57022927dbe..c45b5787c4c 100644
--- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
+++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp
@@ -22,8 +22,6 @@
#include <vespa/searchlib/memoryindex/field_index_collection.h>
#include <vespa/searchlib/memoryindex/posting_iterator.h>
#include <vespa/searchlib/test/index/mock_field_length_inspector.h>
-#include <vespa/searchlib/util/filekit.h>
-#include <vespa/vespalib/btree/btreenode.hpp>
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/util/gate.h>
#include <vespa/vespalib/util/destructor_callbacks.h>
@@ -365,7 +363,7 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
const vespalib::string dump2dir = prefix + "dump2";
constexpr uint32_t numDocs = 12 + 1;
- IndexBuilder ib(schema, dump2dir, numDocs);
+
const uint32_t numWords = fic.getNumUniqueWords();
MockFieldLengthInspector mock_field_length_inspector;
TuneFileIndexing tuneFileIndexing;
@@ -379,10 +377,11 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire
if (readmmap) {
tuneFileSearch._read.setWantMemoryMap();
}
- ib.open(numWords, mock_field_length_inspector, tuneFileIndexing, fileHeaderContext);
- fic.dump(ib);
- ib.close();
-
+ {
+ IndexBuilder ib(schema, dump2dir, numDocs, numWords,
+ mock_field_length_inspector, tuneFileIndexing, fileHeaderContext);
+ fic.dump(ib);
+ }
vespalib::ThreadStackExecutor executor(4);
do {
@@ -480,12 +479,10 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng
inv.invertDocument(10, *doc10, {});
myPushDocument(inv);
- IndexBuilder ib(_schema, dump_dir, numDocs);
TuneFileIndexing tuneFileIndexing;
DummyFileHeaderContext fileHeaderContext;
- ib.open(numWords, field_length_inspector, tuneFileIndexing, fileHeaderContext);
+ IndexBuilder ib(_schema, dump_dir, numDocs, numWords, field_length_inspector, tuneFileIndexing, fileHeaderContext);
fic.dump(ib);
- ib.close();
}
bool
diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
index d309da26feb..1a28a960b7e 100644
--- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
+++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp
@@ -6,10 +6,8 @@
#include <vespa/document/fieldvalue/stringfieldvalue.h>
#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
#include <vespa/document/repo/configbuilder.h>
-#include <vespa/searchlib/diskindex/fusion.h>
#include <vespa/searchlib/diskindex/indexbuilder.h>
#include <vespa/searchlib/diskindex/zcposoccrandread.h>
-#include <vespa/searchlib/fef/fieldpositionsiterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/index/docidandfeatures.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
@@ -69,76 +67,79 @@ using NormalFieldIndex = FieldIndex<false>;
class MyBuilder : public IndexBuilder {
private:
std::stringstream _ss;
- bool _insideWord;
- bool _insideField;
- bool _firstWord;
bool _firstField;
- bool _firstDoc;
-public:
- explicit MyBuilder(const Schema &schema);
- ~MyBuilder() override;
-
- void startWord(vespalib::stringref word) override {
- assert(_insideField);
- assert(!_insideWord);
- if (!_firstWord)
- _ss << ",";
- _ss << "w=" << word << "[";
- _firstDoc = true;
- _insideWord = true;
- }
-
- void endWord() override {
- assert(_insideWord);
- _ss << "]";
- _firstWord = false;
- _insideWord = false;
- }
-
- void startField(uint32_t fieldId) override {
- assert(!_insideField);
- if (!_firstField) _ss << ",";
- _ss << "f=" << fieldId << "[";
- _firstWord = true;
- _insideField = true;
- }
-
- void endField() override {
- assert(_insideField);
- assert(!_insideWord);
- _ss << "]";
- _firstField = false;
- _insideField = false;
- }
+ class FieldIndexBuilder : public index::FieldIndexBuilder {
+ public:
+ explicit FieldIndexBuilder(std::stringstream & ss)
+ : _ss(ss),
+ _insideWord(false),
+ _firstWord(true),
+ _firstDoc(true)
+ {}
+ ~FieldIndexBuilder() override {
+ assert(!_insideWord);
+ _ss << "]";
+ }
+ void startWord(vespalib::stringref word) override {
+ assert(!_insideWord);
+ if (!_firstWord)
+ _ss << ",";
+ _ss << "w=" << word << "[";
+ _firstDoc = true;
+ _insideWord = true;
+ }
- void add_document(const DocIdAndFeatures &features) override {
- assert(_insideWord);
- if (!_firstDoc) {
- _ss << ",";
+ void endWord() override {
+ assert(_insideWord);
+ _ss << "]";
+ _firstWord = false;
+ _insideWord = false;
}
- _ss << "d=" << features.doc_id() << "[";
- bool first_elem = true;
- size_t word_pos_offset = 0;
- for (const auto& elem : features.elements()) {
- if (!first_elem) {
+ void add_document(const DocIdAndFeatures &features) override {
+ assert(_insideWord);
+ if (!_firstDoc) {
_ss << ",";
}
- _ss << "e=" << elem.getElementId() << ",w=" << elem.getWeight() << ",l=" << elem.getElementLen() << "[";
- bool first_pos = true;
- for (size_t i = 0; i < elem.getNumOccs(); ++i) {
- if (!first_pos) {
+ _ss << "d=" << features.doc_id() << "[";
+ bool first_elem = true;
+ size_t word_pos_offset = 0;
+ for (const auto& elem : features.elements()) {
+ if (!first_elem) {
_ss << ",";
}
- _ss << features.word_positions()[i + word_pos_offset].getWordPos();
- first_pos = false;
+ _ss << "e=" << elem.getElementId() << ",w=" << elem.getWeight() << ",l=" << elem.getElementLen() << "[";
+ bool first_pos = true;
+ for (size_t i = 0; i < elem.getNumOccs(); ++i) {
+ if (!first_pos) {
+ _ss << ",";
+ }
+ _ss << features.word_positions()[i + word_pos_offset].getWordPos();
+ first_pos = false;
+ }
+ word_pos_offset += elem.getNumOccs();
+ _ss << "]";
+ first_elem = false;
}
- word_pos_offset += elem.getNumOccs();
_ss << "]";
- first_elem = false;
+ _firstDoc = false;
}
- _ss << "]";
- _firstDoc = false;
+ private:
+ std::stringstream & _ss;
+ bool _insideWord;
+ bool _firstWord;
+ bool _firstDoc;
+ };
+public:
+ explicit MyBuilder(const Schema &schema);
+ ~MyBuilder() override;
+
+ std::unique_ptr<index::FieldIndexBuilder>
+ startField(uint32_t fieldId) override {
+ if (!_firstField) _ss << ",";
+ _ss << "f=" << fieldId << "[";
+ _firstField = false;
+ return std::make_unique<FieldIndexBuilder>(_ss);
}
std::string toStr() const {
@@ -149,11 +150,7 @@ public:
MyBuilder::MyBuilder(const Schema &schema)
: IndexBuilder(schema),
_ss(),
- _insideWord(false),
- _insideField(false),
- _firstWord(true),
- _firstField(true),
- _firstDoc(true)
+ _firstField(true)
{}
MyBuilder::~MyBuilder() = default;
@@ -826,18 +823,19 @@ TEST_F(FieldIndexCollectionTest, require_that_features_are_in_posting_lists)
TEST_F(FieldIndexCollectionTest, require_that_basic_dumping_to_index_builder_is_working)
{
MyBuilder b(schema);
- WordDocElementWordPosFeatures wpf;
- b.startField(4);
- b.startWord("a");
- DocIdAndFeatures features;
- features.set_doc_id(2);
- features.elements().emplace_back(0, 10, 20);
- features.elements().back().setNumOccs(2);
- features.word_positions().emplace_back(1);
- features.word_positions().emplace_back(3);
- b.add_document(features);
- b.endWord();
- b.endField();
+ {
+ WordDocElementWordPosFeatures wpf;
+ auto fb = b.startField(4);
+ fb->startWord("a");
+ DocIdAndFeatures features;
+ features.set_doc_id(2);
+ features.elements().emplace_back(0, 10, 20);
+ features.elements().back().setNumOccs(2);
+ features.word_positions().emplace_back(1);
+ features.word_positions().emplace_back(3);
+ fb->add_document(features);
+ fb->endWord();
+ }
EXPECT_EQ("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr());
}
@@ -887,12 +885,12 @@ TEST_F(FieldIndexCollectionTest, require_that_dumping_words_with_no_docs_to_inde
b.toStr());
}
{
- search::diskindex::IndexBuilder b(schema, "dump", 5);
TuneFileIndexing tuneFileIndexing;
DummyFileHeaderContext fileHeaderContext;
- b.open(2, MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext);
+ MockFieldLengthInspector fieldLengthInspector;
+ search::diskindex::IndexBuilder b(schema, "dump", 5, 2, fieldLengthInspector,
+ tuneFileIndexing, fileHeaderContext);
fic.dump(b);
- b.close();
}
}
@@ -1235,12 +1233,12 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working)
EXPECT_TRUE(itr->isAtEnd());
}
{
- search::diskindex::IndexBuilder dib(_schema, "urldump", 11);
TuneFileIndexing tuneFileIndexing;
DummyFileHeaderContext fileHeaderContext;
- dib.open(_fic.getNumUniqueWords(), MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext);
+ MockFieldLengthInspector fieldLengthInspector;
+ search::diskindex::IndexBuilder dib(_schema, "urldump", 11, _fic.getNumUniqueWords(),
+ fieldLengthInspector, tuneFileIndexing, fileHeaderContext);
_fic.dump(dib);
- dib.close();
}
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index 350f4dfd145..bf295acec75 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -81,8 +81,7 @@ bool
DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch)
{
for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) {
- vespalib::string dictName =
- _indexDir + "/" + itr.getName() + "/dictionary";
+ vespalib::string dictName = _indexDir + "/" + itr.getName() + "/dictionary";
auto dict = std::make_unique<PageDict4RandRead>();
if (!dict->open(dictName, tuneFileSearch._read)) {
LOG(warning, "Could not open disk dictionary '%s'", dictName.c_str());
@@ -152,7 +151,10 @@ DiskIndex::openField(const vespalib::string &fieldDir,
bool
DiskIndex::setup(const TuneFileSearch &tuneFileSearch)
{
- if (!loadSchema() || !openDictionaries(tuneFileSearch)) {
+ if (!loadSchema() ) {
+ return false;
+ }
+ if (!openDictionaries(tuneFileSearch)) {
return false;
}
for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) {
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
index 3c3ee4166bb..5cf80d06c87 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp
@@ -50,17 +50,16 @@ public:
FieldWriter* writer() { return _fieldWriter.get(); }
};
-}
-
-class IndexBuilder::FieldHandle {
+class FieldHandle {
private:
const Schema &_schema;
IndexBuilder &_builder;
FileHandle _file;
const uint32_t _fieldId;
- const bool _valid;
public:
- FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder & builder, bool valid) noexcept;
+ FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder & builder, uint32_t docIdLimit,
+ uint64_t numWordIds, const IFieldLengthInspector & field_length_inspector,
+ const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext);
~FieldHandle();
void new_word(vespalib::stringref word);
@@ -69,16 +68,68 @@ public:
const Schema::IndexField &getSchemaField();
const vespalib::string &getName();
vespalib::string getDir();
- void open(uint32_t docIdLimit, uint64_t numWordIds,
- const FieldLengthInfo &field_length_info,
- const TuneFileSeqWrite &tuneFileWrite,
- const FileHeaderContext &fileHeaderContext);
void close();
+ uint32_t getIndexId() const noexcept { return _fieldId; }
+};
+
+class FieldIndexBuilder : public index::FieldIndexBuilder {
+public:
+ FieldIndexBuilder(const Schema &schema, uint32_t fieldId, IndexBuilder & builder, uint32_t docidLimit,
+ uint64_t numWordIds, const IFieldLengthInspector & field_length_inspector,
+ const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext);
+ ~FieldIndexBuilder() override;
+ void startWord(vespalib::stringref word) override;
+ void endWord() override;
+ void add_document(const DocIdAndFeatures &features) override;
+private:
+ FieldHandle _field;
+ vespalib::string _curWord;
+ uint32_t _curDocId;
+ bool _inWord;
+
+ static constexpr uint32_t noDocId() {
+ return std::numeric_limits<uint32_t>::max();
+ }
- bool getValid() const { return _valid; }
- uint32_t getIndexId() const { return _fieldId; }
+ static constexpr uint64_t noWordNumHigh() {
+ return std::numeric_limits<uint64_t>::max();
+ }
};
+FieldIndexBuilder::FieldIndexBuilder(const Schema &schema, uint32_t fieldId, IndexBuilder & builder, uint32_t docidLimit,
+ uint64_t numWordIds, const IFieldLengthInspector & field_length_inspector,
+ const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext)
+ : _field(schema, fieldId, builder, docidLimit, numWordIds, field_length_inspector, tuneFileWrite, fileHeaderContext),
+ _curWord(),
+ _curDocId(noDocId()),
+ _inWord(false)
+{}
+
+FieldIndexBuilder::~FieldIndexBuilder() = default;
+
+void
+FieldIndexBuilder::startWord(vespalib::stringref word)
+{
+ assert(!_inWord);
+ // TODO: Check sort order
+ _curWord = word;
+ _inWord = true;
+ _field.new_word(word);
+}
+
+void
+FieldIndexBuilder::endWord()
+{
+ assert(_inWord);
+ _inWord = false;
+}
+
+void
+FieldIndexBuilder::add_document(const index::DocIdAndFeatures &features)
+{
+ assert(_inWord);
+ _field.add_document(features);
+}
FileHandle::FileHandle()
: _fieldWriter()
@@ -126,175 +177,92 @@ FileHandle::close()
(void) ret;
}
-IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder &builder, bool valid) noexcept
- : _schema(schema),
- _builder(builder),
- _file(),
- _fieldId(fieldId),
- _valid(valid)
+FieldHandle::FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder &builder, uint32_t docIdLimit,
+ uint64_t numWordIds, const IFieldLengthInspector & field_length_inspector,
+ const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext)
+ : _schema(schema),
+ _builder(builder),
+ _file(),
+ _fieldId(fieldId)
{
+ std::filesystem::create_directory(std::filesystem::path(getDir()));
+ _file.open(getDir(), SchemaUtil::IndexIterator(_schema, getIndexId()), docIdLimit, numWordIds,
+ field_length_inspector.get_field_length_info(getName()), tuneFileWrite, fileHeaderContext);
}
-IndexBuilder::FieldHandle::~FieldHandle() = default;
+FieldHandle::~FieldHandle() {
+ close();
+}
void
-IndexBuilder::FieldHandle::new_word(vespalib::stringref word)
+FieldHandle::new_word(vespalib::stringref word)
{
- assert(_valid);
_file.writer()->newWord(word);
}
void
-IndexBuilder::FieldHandle::add_document(const index::DocIdAndFeatures &features)
+FieldHandle::add_document(const index::DocIdAndFeatures &features)
{
_file.writer()->add(features);
}
const Schema::IndexField &
-IndexBuilder::FieldHandle::getSchemaField()
+FieldHandle::getSchemaField()
{
return _schema.getIndexField(_fieldId);
}
const vespalib::string &
-IndexBuilder::FieldHandle::getName()
+FieldHandle::getName()
{
return getSchemaField().getName();
}
vespalib::string
-IndexBuilder::FieldHandle::getDir()
+FieldHandle::getDir()
{
return _builder.appendToPrefix(getName());
}
void
-IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds,
- const FieldLengthInfo &field_length_info,
- const TuneFileSeqWrite &tuneFileWrite,
- const FileHeaderContext &fileHeaderContext)
+FieldHandle::close()
{
- _file.open(getDir(), SchemaUtil::IndexIterator(_schema, getIndexId()),
- docIdLimit, numWordIds, field_length_info, tuneFileWrite, fileHeaderContext);
+ _file.close();
+ vespalib::File::sync(getDir());
}
-void
-IndexBuilder::FieldHandle::close()
-{
- _file.close();
}
-std::vector<IndexBuilder::FieldHandle>
-IndexBuilder::extractFields(const Schema &schema, IndexBuilder & builder) {
- std::vector<IndexBuilder::FieldHandle> fields;
+std::vector<int32_t>
+extractFields(const Schema &schema) {
+ std::vector<int32_t> fields;
fields.reserve(schema.getNumIndexFields());
// TODO: Filter for text indexes
for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) {
const Schema::IndexField &iField = schema.getIndexField(i);
// Only know how to handle string index for now.
bool valid = (iField.getDataType() == DataType::STRING);
- fields.emplace_back(schema, i, builder, valid);
+ fields.push_back( valid ? i : -1);
}
return fields;
}
-IndexBuilder::IndexBuilder(const Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit)
+IndexBuilder::IndexBuilder(const Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit,
+ uint64_t numWordIds, const index::IFieldLengthInspector &field_length_inspector,
+ const TuneFileIndexing &tuneFileIndexing, const search::common::FileHeaderContext &fileHeaderContext)
: index::IndexBuilder(schema),
_schema(schema),
- _fields(extractFields(schema, *this)),
+ _fields(extractFields(schema)),
_prefix(prefix),
- _curWord(),
_docIdLimit(docIdLimit),
- _curFieldId(-1),
- _lowestOKFieldId(0u),
- _curDocId(noDocId()),
- _inWord(false)
+ _numWordIds(numWordIds),
+ _field_length_inspector(field_length_inspector),
+ _tuneFileIndexing(tuneFileIndexing),
+ _fileHeaderContext(fileHeaderContext)
{
-}
-
-IndexBuilder::~IndexBuilder() = default;
-
-IndexBuilder::FieldHandle &
-IndexBuilder::currentField() {
- assert(_curFieldId >= 0);
- assert(_curFieldId < int32_t(_fields.size()));
- return _fields[_curFieldId];
-}
-void
-IndexBuilder::startField(uint32_t fieldId)
-{
- assert(_curDocId == noDocId());
- assert(_curFieldId == -1);
- assert(fieldId < _fields.size());
- assert(fieldId >= _lowestOKFieldId);
- _curFieldId = fieldId;
-}
-
-void
-IndexBuilder::endField()
-{
- assert(_curDocId == noDocId());
- assert(!_inWord);
- _lowestOKFieldId = currentField().getIndexId() + 1;
- _curFieldId = -1;
-}
-
-void
-IndexBuilder::startWord(vespalib::stringref word)
-{
- assert(!_inWord);
- // TODO: Check sort order
- _curWord = word;
- _inWord = true;
- currentField().new_word(word);
-}
-
-void
-IndexBuilder::endWord()
-{
- assert(_inWord);
- assert(_curFieldId != -1);
- _inWord = false;
-}
-
-void
-IndexBuilder::add_document(const index::DocIdAndFeatures &features)
-{
- assert(_inWord);
- currentField().add_document(features);
-}
-
-vespalib::string
-IndexBuilder::appendToPrefix(vespalib::stringref name) const
-{
- if (_prefix.empty()) {
- return name;
- }
- return _prefix + "/" + name;
-}
-
-void
-IndexBuilder::open(uint64_t numWordIds,
- const IFieldLengthInspector &field_length_inspector,
- const TuneFileIndexing &tuneFileIndexing,
- const FileHeaderContext &fileHeaderContext)
-{
- std::vector<uint32_t> indexes;
-
if (!_prefix.empty()) {
std::filesystem::create_directory(std::filesystem::path(_prefix));
}
- // TODO: Filter for text indexes
- for (FieldHandle & fh : _fields) {
- if (!fh.getValid()) {
- continue;
- }
- std::filesystem::create_directory(std::filesystem::path(fh.getDir()));
- fh.open(_docIdLimit, numWordIds,
- field_length_inspector.get_field_length_info(fh.getName()),
- tuneFileIndexing._write, fileHeaderContext);
- indexes.push_back(fh.getIndexId());
- }
vespalib::string schemaFile = appendToPrefix("schema.txt");
if (!_schema.saveToFile(schemaFile)) {
LOG(error, "Cannot save schema to \"%s\"", schemaFile.c_str());
@@ -302,16 +270,7 @@ IndexBuilder::open(uint64_t numWordIds,
}
}
-void
-IndexBuilder::close()
-{
- // TODO: Filter for text indexes
- for (FieldHandle & fh : _fields) {
- if (fh.getValid()) {
- fh.close();
- vespalib::File::sync(fh.getDir());
- }
- }
+IndexBuilder::~IndexBuilder() {
if (!docsummary::DocumentSummary::writeDocIdLimit(_prefix, _docIdLimit)) {
LOG(error, "Could not write docsum count in dir %s: %s",
_prefix.c_str(), getLastErrorString().c_str());
@@ -319,4 +278,23 @@ IndexBuilder::close()
}
}
+std::unique_ptr<index::FieldIndexBuilder>
+IndexBuilder::startField(uint32_t fieldId) {
+ if (_fields[fieldId] >= 0) {
+ return std::make_unique<FieldIndexBuilder>(_schema, fieldId, *this, _docIdLimit, _numWordIds,
+ _field_length_inspector, _tuneFileIndexing._write,
+ _fileHeaderContext);
+ }
+ return {};
+}
+
+vespalib::string
+IndexBuilder::appendToPrefix(vespalib::stringref name) const
+{
+ if (_prefix.empty()) {
+ return name;
+ }
+ return _prefix + "/" + name;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
index c5a2f6e1536..4ef6ab4a813 100644
--- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h
@@ -22,43 +22,26 @@ class BitVectorCandidate;
class IndexBuilder : public index::IndexBuilder {
public:
// Schema argument must live until IndexBuilder has been deleted.
- IndexBuilder(const index::Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit);
+ IndexBuilder(const index::Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit,
+ uint64_t numWordIds, const index::IFieldLengthInspector &field_length_inspector,
+ const TuneFileIndexing &tuneFileIndexing, const search::common::FileHeaderContext &fileHeaderContext);
~IndexBuilder() override;
- void startField(uint32_t fieldId) override;
- void endField() override;
- void startWord(vespalib::stringref word) override;
- void endWord() override;
- void add_document(const index::DocIdAndFeatures &features) override;
+ std::unique_ptr<index::FieldIndexBuilder> startField(uint32_t fieldId) override;
vespalib::string appendToPrefix(vespalib::stringref name) const;
-
- void open(uint64_t numWordIds, const index::IFieldLengthInspector &field_length_inspector,
- const TuneFileIndexing &tuneFileIndexing,
- const common::FileHeaderContext &fileHandleContext);
-
- void close();
private:
- class FieldHandle;
const index::Schema &_schema;
- std::vector<FieldHandle> _fields;
+ std::vector<int> _fields;
const vespalib::string _prefix;
- vespalib::string _curWord;
const uint32_t _docIdLimit;
- int32_t _curFieldId;
- uint32_t _lowestOKFieldId;
- uint32_t _curDocId;
- bool _inWord;
-
- static std::vector<IndexBuilder::FieldHandle> extractFields(const index::Schema &schema, IndexBuilder & builder);
-
- static uint32_t noDocId() {
- return std::numeric_limits<uint32_t>::max();
- }
+ const uint32_t _numWordIds;
+ const index::IFieldLengthInspector &_field_length_inspector;
+ const TuneFileIndexing &_tuneFileIndexing;
+ const search::common::FileHeaderContext &_fileHeaderContext;
static uint64_t noWordNumHigh() {
return std::numeric_limits<uint64_t>::max();
}
- FieldHandle & currentField();
};
}
diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h
index 37f8a9c30be..9615bfd9428 100644
--- a/searchlib/src/vespa/searchlib/index/indexbuilder.h
+++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h
@@ -2,6 +2,7 @@
#pragma once
#include <vespa/vespalib/stllike/string.h>
+#include <memory>
namespace search::index {
@@ -9,6 +10,14 @@ class DocIdAndFeatures;
class Schema;
class WordDocElementWordPosFeatures;
+class FieldIndexBuilder {
+public:
+ virtual ~FieldIndexBuilder() = default;
+ virtual void startWord(vespalib::stringref word) = 0;
+ virtual void endWord() = 0;
+ virtual void add_document(const DocIdAndFeatures &features) = 0;
+};
+
/**
* Interface used to build an index for the set of index fields specified in a schema.
*
@@ -22,14 +31,9 @@ protected:
const Schema &_schema;
public:
- IndexBuilder(const Schema &schema);
-
+ explicit IndexBuilder(const Schema &schema);
virtual ~IndexBuilder();
- virtual void startField(uint32_t fieldId) = 0;
- virtual void endField() = 0;
- virtual void startWord(vespalib::stringref word) = 0;
- virtual void endWord() = 0;
- virtual void add_document(const DocIdAndFeatures &features) = 0;
+ virtual std::unique_ptr<FieldIndexBuilder> startField(uint32_t fieldId) = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
index 24582d7e692..db3a0019d94 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp
@@ -149,7 +149,7 @@ FieldIndex<interleaved_features>::compactFeatures()
template <bool interleaved_features>
void
-FieldIndex<interleaved_features>::dump(search::index::IndexBuilder & indexBuilder)
+FieldIndex<interleaved_features>::dump(search::index::FieldIndexBuilder & indexBuilder)
{
vespalib::stringref word;
FeatureStore::DecodeContextCooked decoder(nullptr);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 9ae9d1b2aef..0b245300a7b 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -82,7 +82,7 @@ public:
void compactFeatures() override;
- void dump(search::index::IndexBuilder & indexBuilder) override;
+ void dump(search::index::FieldIndexBuilder & indexBuilder) override;
vespalib::MemoryUsage getMemoryUsage() const override;
PostingListStore &getPostingListStore() { return _postingListStore; }
@@ -98,8 +98,7 @@ public:
/**
* Should only by used by unit tests.
*/
- queryeval::SearchIterator::UP make_search_iterator(const vespalib::string& term,
- uint32_t field_id,
+ queryeval::SearchIterator::UP make_search_iterator(const vespalib::string& term, uint32_t field_id,
fef::TermFieldMatchDataArray match_data) const;
std::unique_ptr<queryeval::SimpleLeafBlueprint> make_term_blueprint(const vespalib::string& term,
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
index bd933bb118f..0264b5e968b 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.cpp
@@ -6,13 +6,9 @@
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/index/i_field_length_inspector.h>
#include <vespa/searchcommon/common/schema.h>
-#include <vespa/vespalib/btree/btree.hpp>
#include <vespa/vespalib/btree/btreeiterator.hpp>
-#include <vespa/vespalib/btree/btreenode.hpp>
#include <vespa/vespalib/btree/btreenodeallocator.hpp>
#include <vespa/vespalib/btree/btreenodestore.hpp>
-#include <vespa/vespalib/btree/btreeroot.hpp>
-#include <vespa/vespalib/btree/btreestore.hpp>
#include <vespa/vespalib/util/exceptions.h>
namespace search {
@@ -46,9 +42,10 @@ void
FieldIndexCollection::dump(search::index::IndexBuilder &indexBuilder)
{
for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) {
- indexBuilder.startField(fieldId);
- _fieldIndexes[fieldId]->dump(indexBuilder);
- indexBuilder.endField();
+ auto fieldIndexBuilder = indexBuilder.startField(fieldId);
+ if (fieldIndexBuilder) {
+ _fieldIndexes[fieldId]->dump(*fieldIndexBuilder);
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
index 6736ed2c2ad..34a09c5572f 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index_collection.h
@@ -10,6 +10,7 @@
namespace search::index {
class IFieldLengthInspector;
class Schema;
+ class IndexBuilder;
}
namespace search::memoryindex {
diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h
index ee075290dc9..b7cad9d1c38 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/i_field_index.h
@@ -12,7 +12,7 @@ namespace search::queryeval {
}
namespace search::index {
class FieldLengthCalculator;
-class IndexBuilder;
+class FieldIndexBuilder;
}
namespace search::memoryindex {
@@ -37,7 +37,7 @@ public:
virtual FieldIndexRemover& getDocumentRemover() = 0;
virtual index::FieldLengthCalculator& get_calculator() = 0;
virtual void compactFeatures() = 0;
- virtual void dump(search::index::IndexBuilder& indexBuilder) = 0;
+ virtual void dump(search::index::FieldIndexBuilder& builder) = 0;
virtual std::unique_ptr<queryeval::SimpleLeafBlueprint> make_term_blueprint(const vespalib::string& term,
const queryeval::FieldSpec& field,
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
index a32cd284fc6..f4a603e7d30 100644
--- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
+++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp
@@ -35,39 +35,30 @@ class MockFieldLengthInspector : public IFieldLengthInspector {
struct Builder
{
- search::diskindex::IndexBuilder _ib;
MockFieldLengthInspector _mock_field_length_inspector;
TuneFileIndexing _tuneFileIndexing;
DummyFileHeaderContext _fileHeaderContext;
+ search::diskindex::IndexBuilder _ib;
DocIdAndFeatures _features;
- Builder(const std::string &dir,
- const Schema &s,
- uint32_t docIdLimit,
- uint64_t numWordIds,
- bool directio)
- : _ib(s, dir, docIdLimit),
- _tuneFileIndexing(),
+ Builder(const std::string &dir, const Schema &s, uint32_t docIdLimit, uint64_t numWordIds, bool directio)
+ : _tuneFileIndexing(),
_fileHeaderContext(),
+ _ib(s, dir, docIdLimit,numWordIds, _mock_field_length_inspector, _tuneFileIndexing, _fileHeaderContext),
_features()
{
if (directio) {
_tuneFileIndexing._read.setWantDirectIO();
_tuneFileIndexing._write.setWantDirectIO();
}
- _ib.open(numWordIds, _mock_field_length_inspector, _tuneFileIndexing, _fileHeaderContext);
}
- void addDoc(uint32_t docId) {
+ void addDoc(index::FieldIndexBuilder & fb, uint32_t docId) {
_features.clear(docId);
_features.elements().emplace_back(0, 1, 1);
_features.elements().back().setNumOccs(1);
_features.word_positions().emplace_back(0);
- _ib.add_document(_features);
- }
-
- void close() {
- _ib.close();
+ fb.add_document(_features);
}
};
@@ -84,37 +75,39 @@ TestDiskIndex::buildSchema()
void
TestDiskIndex::buildIndex(const std::string & dir, bool directio,
- bool fieldEmpty, bool docEmpty, bool wordEmpty)
+ bool fieldEmpty, bool docEmpty, bool wordEmpty)
{
Builder b(dir, _schema, docEmpty ? 1 : 32, wordEmpty ? 0 : 2, directio);
- if (!wordEmpty && !fieldEmpty && !docEmpty) {
+
+ if (!fieldEmpty) {
// f1
- b._ib.startField(0);
- b._ib.startWord("w1");
- b.addDoc(1);
- b.addDoc(3);
- b._ib.endWord();
- b._ib.endField();
- // f2
- b._ib.startField(1);
- b._ib.startWord("w1");
- b.addDoc(2);
- b.addDoc(4);
- b.addDoc(6);
- b._ib.endWord();
- b._ib.startWord("w2");
- for (uint32_t docId = 1; docId < 18; ++docId) {
- b.addDoc(docId);
+ auto fb = b._ib.startField(0);
+ if (!wordEmpty && !docEmpty) {
+ fb->startWord("w1");
+ b.addDoc(*fb, 1);
+ b.addDoc(*fb, 3);
+ fb->endWord();
+ }
+ fb = b._ib.startField(1);
+ if (!wordEmpty && !docEmpty) {
+ // f2
+ fb->startWord("w1");
+ b.addDoc(*fb, 2);
+ b.addDoc(*fb, 4);
+ b.addDoc(*fb, 6);
+ fb->endWord();
+ fb->startWord("w2");
+ for (uint32_t docId = 1; docId < 18; ++docId) {
+ b.addDoc(*fb, docId);
+ }
+ fb->endWord();
}
- b._ib.endWord();
- b._ib.endField();
}
- b.close();
}
void
TestDiskIndex::openIndex(const std::string &dir, bool directio, bool readmmap,
- bool fieldEmpty, bool docEmpty, bool wordEmpty)
+ bool fieldEmpty, bool docEmpty, bool wordEmpty)
{
buildIndex(dir, directio, fieldEmpty, docEmpty, wordEmpty);
TuneFileRandRead tuneFileRead;