diff options
Diffstat (limited to 'searchlib/src')
14 files changed, 76 insertions, 87 deletions
diff --git a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp index 36cd15c8ada..bea4794cf3c 100644 --- a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp +++ b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp @@ -5,7 +5,7 @@ #include <vespa/searchlib/index/docbuilder.h> #include <vespa/searchlib/memoryindex/documentinverter.h> #include <vespa/searchlib/memoryindex/fieldinverter.h> -#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/common/sequencedtaskexecutor.h> #include <vespa/vespalib/testkit/testapp.h> @@ -97,7 +97,7 @@ struct Fixture SequencedTaskExecutor _invertThreads; SequencedTaskExecutor _pushThreads; DocumentInverter _inv; - test::OrderedDocumentInserter _inserter; + test::OrderedFieldIndexInserter _inserter; static Schema makeSchema() diff --git a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp index 373ed7fd311..324855f1255 100644 --- a/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp +++ b/searchlib/src/tests/memoryindex/field_index/field_index_test.cpp @@ -13,7 +13,7 @@ #include <vespa/searchlib/memoryindex/documentinverter.h> #include <vespa/searchlib/memoryindex/field_index_collection.h> #include <vespa/searchlib/memoryindex/fieldinverter.h> -#include <vespa/searchlib/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/memoryindex/ordered_field_index_inserter.h> #include <vespa/searchlib/memoryindex/postingiterator.h> #include <vespa/searchlib/test/searchiteratorverifier.h> #include <vespa/vespalib/testkit/testapp.h> @@ -300,7 +300,7 @@ MockFieldIndex::~MockFieldIndex() = default; /** * MockWordStoreScan is a helper class to ensure that previous word is - * still stored safely in memory, to satisfy OrderedDocumentInserter + * still stored safely in memory, to satisfy OrderedFieldIndexInserter * needs. */ class MockWordStoreScan @@ -347,7 +347,7 @@ class MyInserter MockFieldIndex _mock; FieldIndexCollection _fieldIndexes; DocIdAndPosOccFeatures _features; - IOrderedDocumentInserter *_documentInserter; + IOrderedFieldIndexInserter *_inserter; public: MyInserter(const Schema &schema) @@ -355,7 +355,7 @@ public: _mock(), _fieldIndexes(schema), _features(), - _documentInserter(nullptr) + _inserter(nullptr) { _features.addNextOcc(0, 0, 1, 1); } @@ -365,32 +365,32 @@ public: setNextWord(const vespalib::string &word) { const vespalib::string &w = _wordStoreScan.setWord(word); - _documentInserter->setNextWord(w); + _inserter->setNextWord(w); _mock.setNextWord(w); } void setNextField(uint32_t fieldId) { - if (_documentInserter != nullptr) { - _documentInserter->flush(); + if (_inserter != nullptr) { + _inserter->flush(); } - _documentInserter = &_fieldIndexes.getFieldIndex(fieldId)->getInserter(); - _documentInserter->rewind(); + _inserter = &_fieldIndexes.getFieldIndex(fieldId)->getInserter(); + _inserter->rewind(); _mock.setNextField(fieldId); } void add(uint32_t docId) { - _documentInserter->add(docId, _features); + _inserter->add(docId, _features); _mock.add(docId); } void remove(uint32_t docId) { - _documentInserter->remove(docId); + _inserter->remove(docId); _mock.remove(docId); } @@ -406,8 +406,8 @@ public: bool assertPostings() { - if (_documentInserter != nullptr) { - _documentInserter->flush(); + if (_inserter != nullptr) { + _inserter->flush(); } for (auto wfp : _mock) { auto &wf = wfp.first; @@ -423,9 +423,9 @@ public: void rewind() { - if (_documentInserter != nullptr) { - _documentInserter->flush(); - _documentInserter = nullptr; + if (_inserter != nullptr) { + _inserter->flush(); + _inserter = nullptr; } } @@ -451,7 +451,7 @@ myremove(uint32_t docId, DocumentInverter &inv, FieldIndexCollection &fieldIndex class WrapInserter { - OrderedDocumentInserter &_inserter; + OrderedFieldIndexInserter &_inserter; public: WrapInserter(FieldIndexCollection &fieldIndexes, uint32_t fieldId) : _inserter(fieldIndexes.getFieldIndex(fieldId)->getInserter()) diff --git a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp index 1d066747ef8..42596afb81b 100644 --- a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp +++ b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp @@ -2,7 +2,7 @@ #include <vespa/searchlib/index/docbuilder.h> #include <vespa/searchlib/memoryindex/fieldinverter.h> -#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/document/repo/fixedtyperepo.h> @@ -105,7 +105,7 @@ struct Fixture Schema _schema; DocBuilder _b; std::vector<std::unique_ptr<FieldInverter> > _inverters; - test::OrderedDocumentInserter _inserter; + test::OrderedFieldIndexInserter _inserter; static Schema makeSchema() diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp index daec09828f6..1fbbb619a5f 100644 --- a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp @@ -5,7 +5,7 @@ #include <vespa/searchlib/index/docbuilder.h> #include <vespa/searchlib/memoryindex/fieldinverter.h> #include <vespa/searchlib/memoryindex/urlfieldinverter.h> -#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h> #include <vespa/vespalib/testkit/testapp.h> #include <vespa/document/repo/fixedtyperepo.h> @@ -183,7 +183,7 @@ struct Fixture DocBuilder _b; std::vector<std::unique_ptr<FieldInverter> > _inverters; std::unique_ptr<UrlFieldInverter> _urlInverter; - test::OrderedDocumentInserter _inserter; + test::OrderedFieldIndexInserter _inserter; index::SchemaIndexFields _schemaIndexFields; static Schema diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt index ffcd7ebd975..0a0ed50be71 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -9,7 +9,7 @@ vespa_add_library(searchlib_memoryindex OBJECT field_index_collection.cpp fieldinverter.cpp memoryindex.cpp - ordereddocumentinserter.cpp + ordered_field_index_inserter.cpp postingiterator.cpp urlfieldinverter.cpp wordstore.cpp diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp index 5bc7c96fe8e..6f1b86fc540 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp @@ -3,7 +3,7 @@ #include "documentinverter.h" #include "field_index_collection.h" #include "fieldinverter.h" -#include "ordereddocumentinserter.h" +#include "ordered_field_index_inserter.h" #include "urlfieldinverter.h" #include <vespa/document/annotation/alternatespanlist.h> #include <vespa/document/datatype/urldatatype.h> @@ -179,7 +179,7 @@ DocumentInverter::pushDocuments(FieldIndexCollection &fieldIndexes, for (auto &inverter : _inverters) { FieldIndex &fieldIndex(**indexFieldIterator); DocumentRemover &remover(fieldIndex.getDocumentRemover()); - OrderedDocumentInserter &inserter(fieldIndex.getInserter()); + OrderedFieldIndexInserter &inserter(fieldIndex.getInserter()); _pushThreads.execute(fieldId, [inverter(inverter.get()), &remover, &inserter, &fieldIndex, onWriteDone]() diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp index ef186175ae3..7d10895c32f 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.cpp @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "field_index.h" -#include "ordereddocumentinserter.h" +#include "ordered_field_index_inserter.h" #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/searchlib/bitcompression/posocccompression.h> @@ -38,7 +38,7 @@ FieldIndex::FieldIndex(const Schema & schema, uint32_t fieldId) _featureStore(schema), _fieldId(fieldId), _remover(_wordStore), - _inserter(std::make_unique<OrderedDocumentInserter>(*this)) + _inserter(std::make_unique<OrderedFieldIndexInserter>(*this)) { } FieldIndex::~FieldIndex() diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 04f1cbc23ab..3d4cce23f7d 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -16,7 +16,7 @@ namespace search::memoryindex { -class OrderedDocumentInserter; +class OrderedFieldIndexInserter; /** * Memory index for a single field. @@ -90,7 +90,7 @@ private: FeatureStore _featureStore; uint32_t _fieldId; DocumentRemover _remover; - std::unique_ptr<OrderedDocumentInserter> _inserter; + std::unique_ptr<OrderedFieldIndexInserter> _inserter; public: datastore::EntryRef addWord(const vespalib::stringref word) { @@ -112,7 +112,7 @@ public: uint64_t getNumUniqueWords() const { return _numUniqueWords; } const FeatureStore & getFeatureStore() const { return _featureStore; } const WordStore &getWordStore() const { return _wordStore; } - OrderedDocumentInserter &getInserter() const { return *_inserter; } + OrderedFieldIndexInserter &getInserter() const { return *_inserter; } private: void freeze() { diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp index 10d42640be1..1303a93bdce 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldinverter.h" -#include "ordereddocumentinserter.h" +#include "ordered_field_index_inserter.h" #include <vespa/document/datatype/urldatatype.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> @@ -468,7 +468,7 @@ FieldInverter::applyRemoves(DocumentRemover &remover) } void -FieldInverter::pushDocuments(IOrderedDocumentInserter &inserter) +FieldInverter::pushDocuments(IOrderedFieldIndexInserter &inserter) { trimAbortedDocs(); diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h index e05cf643bf8..3dcafce1045 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h @@ -15,7 +15,7 @@ namespace search::memoryindex { -class IOrderedDocumentInserter; +class IOrderedFieldIndexInserter; class DocumentRemover; class FieldInverter : public IDocumentRemoveListener { @@ -329,14 +329,11 @@ public: void applyRemoves(DocumentRemover &remover); /** - * Push inverted documents to memory index structure. + * Push inverted documents to field index structure using the given inserter. * - * Temporary restriction: Currently only one document at a time is - * supported. - * - * @param inserter ordered document inserter + * Temporary restriction: Currently only one document at a time is supported. */ - void pushDocuments(IOrderedDocumentInserter &inserter); + void pushDocuments(IOrderedFieldIndexInserter &inserter); /* * Invert a normal text field, based on annotations. diff --git a/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h index 9edd1eb4d3b..a1eee2e10ee 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h @@ -10,13 +10,14 @@ namespace search::index { class DocIdAndFeatures; } namespace search::memoryindex { /** - * Interface class for ordered document inserter. + * Interface used to insert inverted documents into a FieldIndex, + * updating the underlying posting lists in that index. * - * Insert order must be properly sorted, by (word, docId) + * Insert order must be properly sorted, first by word, then by docId. */ -class IOrderedDocumentInserter { +class IOrderedFieldIndexInserter { public: - virtual ~IOrderedDocumentInserter() {} + virtual ~IOrderedFieldIndexInserter() {} /** * Set next word to operate on. @@ -24,7 +25,7 @@ public: virtual void setNextWord(const vespalib::stringref word) = 0; /** - * Add (word, docId) tuple with given features. + * Add (word, docId) tuple with the given features. */ virtual void add(uint32_t docId, const index::DocIdAndFeatures &features) = 0; @@ -33,15 +34,13 @@ public: */ virtual void remove(uint32_t docId) = 0; - /* - * Flush pending changes to postinglist for (_word). - * - * _dItr is located at correct position. + /** + * Flush pending changes for the current word (into the underlying posting list). */ virtual void flush() = 0; - /* - * Rewind iterator, to start new pass. + /** + * Rewind to prepare for another set of (word, docId) tuples. */ virtual void rewind() = 0; }; diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp index 183f8692377..7813a071b16 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.cpp @@ -1,6 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "ordereddocumentinserter.h" +#include "ordered_field_index_inserter.h" #include "i_document_insert_listener.h" #include <vespa/searchlib/index/docidandfeatures.h> @@ -27,7 +27,7 @@ const vespalib::string emptyWord = ""; } -OrderedDocumentInserter::OrderedDocumentInserter(FieldIndex &fieldIndex) +OrderedFieldIndexInserter::OrderedFieldIndexInserter(FieldIndex &fieldIndex) : _word(), _prevDocId(noDocId), _prevAdd(false), @@ -39,13 +39,13 @@ OrderedDocumentInserter::OrderedDocumentInserter(FieldIndex &fieldIndex) { } -OrderedDocumentInserter::~OrderedDocumentInserter() +OrderedFieldIndexInserter::~OrderedFieldIndexInserter() { flush(); } void -OrderedDocumentInserter::flushWord() +OrderedFieldIndexInserter::flushWord() { if (_removes.empty() && _adds.empty()) { return; @@ -68,14 +68,14 @@ OrderedDocumentInserter::flushWord() } void -OrderedDocumentInserter::flush() +OrderedFieldIndexInserter::flush() { flushWord(); _listener.flush(); } void -OrderedDocumentInserter::setNextWord(const vespalib::stringref word) +OrderedFieldIndexInserter::setNextWord(const vespalib::stringref word) { // TODO: Adjust here if zero length words should be legal. assert(_word < word); @@ -101,8 +101,8 @@ OrderedDocumentInserter::setNextWord(const vespalib::stringref word) } void -OrderedDocumentInserter::add(uint32_t docId, - const index::DocIdAndFeatures &features) +OrderedFieldIndexInserter::add(uint32_t docId, + const index::DocIdAndFeatures &features) { assert(docId != noDocId); assert(_prevDocId == noDocId || _prevDocId < docId || @@ -115,7 +115,7 @@ OrderedDocumentInserter::add(uint32_t docId, } void -OrderedDocumentInserter::remove(uint32_t docId) +OrderedFieldIndexInserter::remove(uint32_t docId) { assert(docId != noDocId); assert(_prevDocId == noDocId || _prevDocId < docId); @@ -125,7 +125,7 @@ OrderedDocumentInserter::remove(uint32_t docId) } void -OrderedDocumentInserter::rewind() +OrderedFieldIndexInserter::rewind() { assert(_removes.empty() && _adds.empty()); _word = ""; @@ -135,7 +135,7 @@ OrderedDocumentInserter::rewind() } datastore::EntryRef -OrderedDocumentInserter::getWordRef() const +OrderedFieldIndexInserter::getWordRef() const { return _dItr.getKey()._wordRef; } diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h index 0efb775487c..c1945ed3996 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/ordered_field_index_inserter.h @@ -2,7 +2,7 @@ #pragma once -#include "iordereddocumentinserter.h" +#include "i_ordered_field_index_inserter.h" #include "field_index.h" #include <limits> @@ -11,12 +11,15 @@ namespace search::memoryindex { class IDocumentInsertListener; /** - * Class for inserting updates to FieldIndex in an ordered manner - * (single pass scan of dictionary tree) + * Class used to insert inverted documents into a FieldIndex, + * updating the underlying posting lists in that index. * - * Insert order must be properly sorted, by (word, docId) + * This is done by doing a single pass scan of the dictionary of the FieldIndex, + * and for each word updating the posting list with docId adds / removes. + * + * Insert order must be properly sorted, first by word, then by docId. */ -class OrderedDocumentInserter : public IOrderedDocumentInserter { +class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter { private: vespalib::stringref _word; uint32_t _prevDocId; @@ -38,7 +41,7 @@ private: static constexpr uint32_t noFieldId = std::numeric_limits<uint32_t>::max(); static constexpr uint32_t noDocId = std::numeric_limits<uint32_t>::max(); - /* + /** * Flush pending changes to postinglist for (_word). * * _dItr is located at correct position. @@ -46,13 +49,13 @@ private: void flushWord(); public: - OrderedDocumentInserter(FieldIndex &fieldIndex); - ~OrderedDocumentInserter() override; + OrderedFieldIndexInserter(FieldIndex &fieldIndex); + ~OrderedFieldIndexInserter() override; void setNextWord(const vespalib::stringref word) override; void add(uint32_t docId, const index::DocIdAndFeatures &features) override; void remove(uint32_t docId) override; - /* + /** * Flush pending changes to postinglist for (_word). Also flush * insert listener. * @@ -60,7 +63,7 @@ public: */ void flush() override; - /* + /** * Rewind iterator, to start new pass. */ void rewind() override; diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h index 4802a7571c2..08473f9fc6c 100644 --- a/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordered_field_index_inserter.h @@ -2,20 +2,12 @@ #pragma once -#include <vespa/searchlib/memoryindex/iordereddocumentinserter.h> +#include <vespa/searchlib/memoryindex/i_ordered_field_index_inserter.h> #include <sstream> -namespace search -{ +namespace search::memoryindex::test { -namespace memoryindex -{ - -namespace test -{ - -class OrderedDocumentInserter : public IOrderedDocumentInserter -{ +class OrderedFieldIndexInserter : public IOrderedFieldIndexInserter { std::stringstream _ss; bool _first; bool _verbose; @@ -31,7 +23,7 @@ class OrderedDocumentInserter : public IOrderedDocumentInserter } } public: - OrderedDocumentInserter() + OrderedFieldIndexInserter() : _ss(), _first(true), _verbose(false), @@ -115,6 +107,4 @@ public: void setVerbose() { _verbose = true; } }; -} // namespace test -} // namespace memoryindex -} // namespace search +} |