diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-07 13:52:15 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-01-07 13:52:15 +0000 |
commit | c49f0ebc56b98b415b4128e3878ff26d18067482 (patch) | |
tree | ca5b629ae87738ab37e0aca2868bac70ba0ce92b /searchlib | |
parent | 9fd55bd72ee01b27efa2a27b32bc416483d2fda0 (diff) |
- Hide members
- Remove unused methods.
- Set params in constructor and make them const.
- reorder members to reduce holes in struct.
Diffstat (limited to 'searchlib')
8 files changed, 120 insertions, 181 deletions
diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp index 272d74cde2c..ca8eaa176a4 100644 --- a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -25,7 +25,6 @@ #include <vespa/searchlib/util/filekit.h> #include <vespa/vespalib/btree/btreenode.hpp> #include <vespa/vespalib/btree/btreenodeallocator.hpp> -#include <vespa/vespalib/btree/btreeroot.hpp> #include <vespa/vespalib/util/gate.h> #include <vespa/vespalib/util/destructor_callbacks.h> #include <vespa/vespalib/util/threadstackexecutor.h> @@ -363,11 +362,11 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire inv.invertDocument(12, *doc, {}); myPushDocument(inv); - IndexBuilder ib(schema); - vespalib::string dump2dir = prefix + "dump2"; - ib.setPrefix(dump2dir); - uint32_t numDocs = 12 + 1; - uint32_t numWords = fic.getNumUniqueWords(); + + const vespalib::string dump2dir = prefix + "dump2"; + constexpr uint32_t numDocs = 12 + 1; + IndexBuilder ib(schema, dump2dir, numDocs); + const uint32_t numWords = fic.getNumUniqueWords(); MockFieldLengthInspector mock_field_length_inspector; TuneFileIndexing tuneFileIndexing; TuneFileSearch tuneFileSearch; @@ -380,7 +379,7 @@ FusionTest::requireThatFusionIsWorking(const vespalib::string &prefix, bool dire if (readmmap) { tuneFileSearch._read.setWantMemoryMap(); } - ib.open(numDocs, numWords, mock_field_length_inspector, tuneFileIndexing, fileHeaderContext); + ib.open(numWords, mock_field_length_inspector, tuneFileIndexing, fileHeaderContext); fic.dump(ib); ib.close(); @@ -475,8 +474,8 @@ void FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLengthInspector &field_length_inspector) { FieldIndexCollection fic(_schema, field_length_inspector); - uint32_t numDocs = 20; - uint32_t numWords = 1000; + constexpr uint32_t numDocs = 20; + constexpr uint32_t numWords = 1000; DocBuilder b(make_add_fields()); auto invertThreads = SequencedTaskExecutor::create(invert_executor, 2); auto pushThreads = SequencedTaskExecutor::create(push_executor, 2); @@ -487,11 +486,10 @@ FusionTest::make_simple_index(const vespalib::string &dump_dir, const IFieldLeng inv.invertDocument(10, *doc10, {}); myPushDocument(inv); - IndexBuilder ib(_schema); + IndexBuilder ib(_schema, dump_dir, numDocs); TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; - ib.setPrefix(dump_dir); - ib.open(numDocs, numWords, field_length_inspector, tuneFileIndexing, fileHeaderContext); + ib.open(numWords, field_length_inspector, tuneFileIndexing, fileHeaderContext); fic.dump(ib); ib.close(); } @@ -503,8 +501,7 @@ FusionTest::try_merge_simple_indexes(const vespalib::string &dump_dir, const std TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; SelectorArray selector(20, 0); - Fusion fusion(_schema, dump_dir, sources, selector, - tuneFileIndexing, fileHeaderContext); + Fusion fusion(_schema, dump_dir, sources, selector, tuneFileIndexing, fileHeaderContext); fusion.set_force_small_merge_chunk(_force_small_merge_chunk); return fusion.merge(executor, flush_token); } diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 42d55c8f769..5f8b6b2df48 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -86,7 +86,7 @@ public: _firstDoc(true) {} - virtual void startWord(vespalib::stringref word) override { + void startWord(vespalib::stringref word) override { assert(_insideField); assert(!_insideWord); if (!_firstWord) @@ -96,14 +96,14 @@ public: _insideWord = true; } - virtual void endWord() override { + void endWord() override { assert(_insideWord); _ss << "]"; _firstWord = false; _insideWord = false; } - virtual void startField(uint32_t fieldId) override { + void startField(uint32_t fieldId) override { assert(!_insideField); if (!_firstField) _ss << ","; _ss << "f=" << fieldId << "["; @@ -111,7 +111,7 @@ public: _insideField = true; } - virtual void endField() override { + void endField() override { assert(_insideField); assert(!_insideWord); _ss << "]"; @@ -119,7 +119,7 @@ public: _insideField = false; } - virtual void add_document(const DocIdAndFeatures &features) override { + void add_document(const DocIdAndFeatures &features) override { assert(_insideWord); if (!_firstDoc) { _ss << ","; @@ -875,11 +875,10 @@ TEST_F(FieldIndexCollectionTest, require_that_dumping_words_with_no_docs_to_inde b.toStr()); } { - search::diskindex::IndexBuilder b(schema); - b.setPrefix("dump"); + search::diskindex::IndexBuilder b(schema, "dump", 5); TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; - b.open(5, 2, MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext); + b.open(2, MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext); fic.dump(b); b.close(); } @@ -1224,14 +1223,10 @@ TEST_F(UriInverterTest, require_that_uri_indexing_is_working) EXPECT_TRUE(itr->isAtEnd()); } { - search::diskindex::IndexBuilder dib(_schema); - dib.setPrefix("urldump"); + search::diskindex::IndexBuilder dib(_schema, "urldump", 11); TuneFileIndexing tuneFileIndexing; DummyFileHeaderContext fileHeaderContext; - dib.open(11, _fic.getNumUniqueWords(), - MockFieldLengthInspector(), - tuneFileIndexing, - fileHeaderContext); + dib.open(_fic.getNumUniqueWords(), MockFieldLengthInspector(), tuneFileIndexing, fileHeaderContext); _fic.dump(dib); dib.close(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h index 162ca512cec..f6316bd7db7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h @@ -17,12 +17,8 @@ class BitVectorFileWrite : public BitVectorIdxFileWrite { private: using Parent = BitVectorIdxFileWrite; - std::unique_ptr<Fast_BufferedFile> _datFile; -public: - -private: - uint32_t _datHeaderLen; + uint32_t _datHeaderLen; public: BitVectorFileWrite(const BitVectorFileWrite &) = delete; @@ -30,7 +26,7 @@ public: BitVectorFileWrite& operator=(const BitVectorFileWrite &) = delete; BitVectorFileWrite& operator=(const BitVectorFileWrite &&) = delete; BitVectorFileWrite(BitVectorKeyScope scope); - ~BitVectorFileWrite(); + ~BitVectorFileWrite() override; void open(const vespalib::string &name, uint32_t docIdLimit, const TuneFileSeqWrite &tuneFileWrite, @@ -53,16 +49,17 @@ class BitVectorCandidate { private: std::vector<uint32_t, vespalib::allocator_large<uint32_t>> _array; - uint64_t _numDocs; - uint32_t _bitVectorLimit; - BitVector::UP _bv; + BitVector::UP _bv; + uint64_t _numDocs; + const uint32_t _bitVectorLimit; + public: BitVectorCandidate(uint32_t docIdLimit, uint32_t bitVectorLimit) : _array(), + _bv(BitVector::create(docIdLimit)), _numDocs(0u), - _bitVectorLimit(bitVectorLimit), - _bv(BitVector::create(docIdLimit)) + _bitVectorLimit(bitVectorLimit) { _array.reserve(_bitVectorLimit); } diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp index d39270069eb..3442a610f4d 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp @@ -17,17 +17,17 @@ using vespalib::getLastErrorString; using common::FileHeaderContext; FieldWriter::FieldWriter(uint32_t docIdLimit, uint64_t numWordIds) - : _wordNum(noWordNum()), - _prevDocId(0), - _dictFile(), + : _dictFile(), _posoccfile(), _bvc(docIdLimit), _bmapfile(BitVectorKeyScope::PERFIELD_WORDS), - _docIdLimit(docIdLimit), - _numWordIds(numWordIds), _prefix(), + _word(), + _numWordIds(numWordIds), _compactWordNum(0), - _word() + _wordNum(noWordNum()), + _prevDocId(0), + _docIdLimit(docIdLimit) { } @@ -162,12 +162,6 @@ FieldWriter::close() } void -FieldWriter::setFeatureParams(const PostingListParams ¶ms) -{ - _posoccfile->setFeatureParams(params); -} - -void FieldWriter::getFeatureParams(PostingListParams ¶ms) { _posoccfile->getFeatureParams(params); diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h index bf62965719d..d541fd59be8 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -18,36 +18,11 @@ namespace search::diskindex { * and by the memory index dump code to write a field to disk. */ class FieldWriter { -private: - uint64_t _wordNum; - uint32_t _prevDocId; - - static uint64_t noWordNum() { return 0u; } public: - - using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite; - - using PostingListFileSeqWrite = index::PostingListFileSeqWrite; using DocIdAndFeatures = index::DocIdAndFeatures; using Schema = index::Schema; - using PostingListCounts = index::PostingListCounts; using PostingListParams = index::PostingListParams; - std::unique_ptr<DictionaryFileSeqWrite> _dictFile; - std::unique_ptr<PostingListFileSeqWrite> _posoccfile; - -private: - BitVectorCandidate _bvc; - BitVectorFileWrite _bmapfile; - uint32_t _docIdLimit; - uint64_t _numWordIds; - vespalib::string _prefix; - uint64_t _compactWordNum; - vespalib::string _word; - - void flush(); - -public: FieldWriter(const FieldWriter &rhs) = delete; FieldWriter(const FieldWriter &&rhs) = delete; FieldWriter &operator=(const FieldWriter &rhs) = delete; @@ -78,9 +53,25 @@ public: bool close(); - void setFeatureParams(const PostingListParams ¶ms); void getFeatureParams(PostingListParams ¶ms); static void remove(const vespalib::string &prefix); +private: + using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite; + using PostingListFileSeqWrite = index::PostingListFileSeqWrite; + using PostingListCounts = index::PostingListCounts; + std::unique_ptr<DictionaryFileSeqWrite> _dictFile; + std::unique_ptr<PostingListFileSeqWrite> _posoccfile; + BitVectorCandidate _bvc; + BitVectorFileWrite _bmapfile; + vespalib::string _prefix; + vespalib::string _word; + const uint64_t _numWordIds; + uint64_t _compactWordNum; + uint64_t _wordNum; + uint32_t _prevDocId; + const uint32_t _docIdLimit; + void flush(); + static uint64_t noWordNum() { return 0u; } }; } diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp index f0964de23f5..e59c18c4ee6 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp @@ -8,7 +8,6 @@ #include <vespa/searchlib/index/schemautil.h> #include <vespa/searchlib/common/documentsummary.h> #include <vespa/vespalib/io/fileutil.h> -#include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/util/error.h> #include <filesystem> @@ -55,17 +54,13 @@ public: class IndexBuilder::FieldHandle { private: - bool _valid; - const Schema *_schema; // Ptr to allow being std::vector member - uint32_t _fieldId; - IndexBuilder *_builder; // Ptr to allow being std::vector member - FileHandle _file; - + const Schema *_schema; // Ptr to allow being std::vector member + IndexBuilder *_builder; // Ptr to allow being std::vector member + FileHandle _file; + const uint32_t _fieldId; + const bool _valid; public: - FieldHandle(const Schema &schema, - uint32_t fieldId, - IndexBuilder *builder); - + FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder *builder, bool valid); ~FieldHandle(); void new_word(vespalib::stringref word); @@ -80,7 +75,6 @@ public: const FileHeaderContext &fileHeaderContext); void close(); - void setValid() { _valid = true; } bool getValid() const { return _valid; } uint32_t getIndexId() const { return _fieldId; } }; @@ -124,8 +118,7 @@ FileHandle::close() bool closeRes = _fieldWriter->close(); _fieldWriter.reset(); if (!closeRes) { - LOG(error, - "Could not close field writer"); + LOG(error, "Could not close field writer"); ret = false; } } @@ -133,14 +126,12 @@ FileHandle::close() (void) ret; } -IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, - uint32_t fieldId, - IndexBuilder *builder) - : _valid(false), - _schema(&schema), - _fieldId(fieldId), +IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder *builder, bool valid) + : _schema(&schema), _builder(builder), - _file() + _file(), + _fieldId(fieldId), + _valid(valid) { } @@ -196,30 +187,32 @@ IndexBuilder::FieldHandle::close() _file.close(); } -IndexBuilder::IndexBuilder(const Schema &schema) - : index::IndexBuilder(schema), - _currentField(nullptr), - _curDocId(noDocId()), - _lowestOKDocId(1u), - _curWord(), - _inWord(false), - _lowestOKFieldId(0u), - _fields(), - _prefix(), - _docIdLimit(0u), - _numWordIds(0u), - _schema(schema) -{ +std::vector<IndexBuilder::FieldHandle> +IndexBuilder::extractFields(const Schema &schema, IndexBuilder * builder) { + std::vector<IndexBuilder::FieldHandle> fields; + fields.reserve(schema.getNumIndexFields()); // TODO: Filter for text indexes - for (uint32_t i = 0, ie = schema.getNumIndexFields(); i < ie; ++i) { + for (uint32_t i = 0; i < schema.getNumIndexFields(); ++i) { const Schema::IndexField &iField = schema.getIndexField(i); - FieldHandle fh(schema, i, this); // Only know how to handle string index for now. - if (iField.getDataType() == DataType::STRING) { - fh.setValid(); - } - _fields.push_back(fh); + bool valid = (iField.getDataType() == DataType::STRING); + fields.emplace_back(schema, i, builder, valid); } + return fields; +} + +IndexBuilder::IndexBuilder(const Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit) + : index::IndexBuilder(schema), + _schema(schema), + _fields(extractFields(schema, this)), + _prefix(prefix), + _curWord(), + _currentField(nullptr), + _docIdLimit(docIdLimit), + _lowestOKFieldId(0u), + _curDocId(noDocId()), + _inWord(false) +{ } IndexBuilder::~IndexBuilder() = default; @@ -262,7 +255,6 @@ IndexBuilder::endWord() assert(_inWord); assert(_currentField != nullptr); _inWord = false; - _lowestOKDocId = 1u; } void @@ -273,14 +265,8 @@ IndexBuilder::add_document(const index::DocIdAndFeatures &features) _currentField->add_document(features); } -void -IndexBuilder::setPrefix(vespalib::stringref prefix) -{ - _prefix = prefix; -} - vespalib::string -IndexBuilder::appendToPrefix(vespalib::stringref name) +IndexBuilder::appendToPrefix(vespalib::stringref name) const { if (_prefix.empty()) { return name; @@ -289,15 +275,13 @@ IndexBuilder::appendToPrefix(vespalib::stringref name) } void -IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds, +IndexBuilder::open(uint64_t numWordIds, const IFieldLengthInspector &field_length_inspector, const TuneFileIndexing &tuneFileIndexing, const FileHeaderContext &fileHeaderContext) { std::vector<uint32_t> indexes; - _docIdLimit = docIdLimit; - _numWordIds = numWordIds; if (!_prefix.empty()) { std::filesystem::create_directory(std::filesystem::path(_prefix)); } @@ -307,10 +291,9 @@ IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds, continue; } std::filesystem::create_directory(std::filesystem::path(fh.getDir())); - fh.open(docIdLimit, numWordIds, + fh.open(_docIdLimit, numWordIds, field_length_inspector.get_field_length_info(fh.getName()), - tuneFileIndexing._write, - fileHeaderContext); + tuneFileIndexing._write, fileHeaderContext); indexes.push_back(fh.getIndexId()); } vespalib::string schemaFile = appendToPrefix("schema.txt"); diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h index c29bbe2e28f..fcb21629ea9 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h @@ -21,37 +21,8 @@ class BitVectorCandidate; */ class IndexBuilder : public index::IndexBuilder { public: - class FieldHandle; - - using Schema = index::Schema; -private: - // Text fields - FieldHandle *_currentField; - uint32_t _curDocId; - uint32_t _lowestOKDocId; - vespalib::string _curWord; - bool _inWord; - uint32_t _lowestOKFieldId; - std::vector<FieldHandle> _fields; // Defined fields. - vespalib::string _prefix; - uint32_t _docIdLimit; - uint64_t _numWordIds; - - const Schema &_schema; - - static uint32_t noDocId() { - return std::numeric_limits<uint32_t>::max(); - } - - static uint64_t noWordNumHigh() { - return std::numeric_limits<uint64_t>::max(); - } - -public: - using WordDocElementWordPosFeatures = index::WordDocElementWordPosFeatures; - // Schema argument must live until IndexBuilder has been deleted. - IndexBuilder(const Schema &schema); + IndexBuilder(const index::Schema &schema, vespalib::stringref prefix, uint32_t docIdLimit); ~IndexBuilder() override; void startField(uint32_t fieldId) override; @@ -59,17 +30,34 @@ public: void startWord(vespalib::stringref word) override; void endWord() override; void add_document(const index::DocIdAndFeatures &features) override; + vespalib::string appendToPrefix(vespalib::stringref name) const; - void setPrefix(vespalib::stringref prefix); - - vespalib::string appendToPrefix(vespalib::stringref name); - - void open(uint32_t docIdLimit, uint64_t numWordIds, - const index::IFieldLengthInspector &field_length_inspector, + void open(uint64_t numWordIds, const index::IFieldLengthInspector &field_length_inspector, const TuneFileIndexing &tuneFileIndexing, const common::FileHeaderContext &fileHandleContext); void close(); +private: + class FieldHandle; + const index::Schema &_schema; + std::vector<FieldHandle> _fields; + const vespalib::string _prefix; + vespalib::string _curWord; + FieldHandle *_currentField; + const uint32_t _docIdLimit; + uint32_t _lowestOKFieldId; + uint32_t _curDocId; + bool _inWord; + + static std::vector<IndexBuilder::FieldHandle> extractFields(const index::Schema &schema, IndexBuilder * builder); + + static uint32_t noDocId() { + return std::numeric_limits<uint32_t>::max(); + } + + static uint64_t noWordNumHigh() { + return std::numeric_limits<uint64_t>::max(); + } }; } diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp index 4d7c67e1f82..b4ebfdc9629 100644 --- a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -36,16 +36,16 @@ struct Builder { search::diskindex::IndexBuilder _ib; MockFieldLengthInspector _mock_field_length_inspector; - TuneFileIndexing _tuneFileIndexing; - DummyFileHeaderContext _fileHeaderContext; - DocIdAndFeatures _features; + TuneFileIndexing _tuneFileIndexing; + DummyFileHeaderContext _fileHeaderContext; + DocIdAndFeatures _features; Builder(const std::string &dir, const Schema &s, uint32_t docIdLimit, uint64_t numWordIds, bool directio) - : _ib(s), + : _ib(s, dir, docIdLimit), _tuneFileIndexing(), _fileHeaderContext(), _features() @@ -54,14 +54,10 @@ struct Builder _tuneFileIndexing._read.setWantDirectIO(); _tuneFileIndexing._write.setWantDirectIO(); } - _ib.setPrefix(dir); - _ib.open(docIdLimit, numWordIds, _mock_field_length_inspector, _tuneFileIndexing, - _fileHeaderContext); + _ib.open(numWordIds, _mock_field_length_inspector, _tuneFileIndexing, _fileHeaderContext); } - void - addDoc(uint32_t docId) - { + void addDoc(uint32_t docId) { _features.clear(docId); _features.elements().emplace_back(0, 1, 1); _features.elements().back().setNumOccs(1); @@ -69,9 +65,7 @@ struct Builder _ib.add_document(_features); } - void - close() - { + void close() { _ib.close(); } }; |