From 77af8383c19ebf0854e800b85879d7207a7468eb Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 12 Apr 2019 07:29:51 +0000 Subject: Add and improve class comments. --- .../searchlib/memoryindex/document_inverter.h | 42 ++++++---- .../vespa/searchlib/memoryindex/feature_store.h | 29 ++++--- .../src/vespa/searchlib/memoryindex/field_index.h | 2 +- .../vespa/searchlib/memoryindex/field_inverter.h | 91 ++++++---------------- .../src/vespa/searchlib/memoryindex/memory_index.h | 79 +++++++++---------- 5 files changed, 104 insertions(+), 139 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h index 5c2d9cc84ed..158302ddea5 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h @@ -24,6 +24,11 @@ class FieldInverter; class UrlFieldInverter; class FieldIndexCollection; +/** + * Class used to invert the fields for a set of documents, preparing for pushing changes info field indexes. + * + * Each text and uri field in the document is handled separately by a FieldInverter and UrlFieldInverter. + */ class DocumentInverter { private: DocumentInverter(const DocumentInverter &) = delete; @@ -48,18 +53,16 @@ private: ISequencedTaskExecutor &_invertThreads; ISequencedTaskExecutor &_pushThreads; - /** - * Obtain the schema used by this index. - * - * @return schema used by this index - */ const index::Schema &getSchema() const { return _schema; } public: /** - * Create a new memory index based on the given schema. + * Create a new document inverter based on the given schema. * - * @param schema the index schema to use + * @param schema the schema with which text and uri fields to consider. + * @param invertThreads the executor with threads for doing document inverting. + * @param pushThreads the executor with threads for doing pushing of inverted documents + * to corresponding field indexes. */ DocumentInverter(const index::Schema &schema, ISequencedTaskExecutor &invertThreads, @@ -68,23 +71,34 @@ public: ~DocumentInverter(); /** - * Push inverted documents to memory field indexes. + * Push the current batch of inverted documents to corresponding field indexes. + * + * This function is async: + * For each field inverter a task for pushing the inverted documents to the corresponding field index + * is added to the 'push threads' executor, then this function returns. + * All tasks hold a reference to the 'onWriteDone' callback, so when the last task is completed, + * the callback is destructed. + * + * NOTE: The caller of this function should sync the 'invert threads' executor first, + * to ensure that inverting is completed before pushing starts. */ void pushDocuments(FieldIndexCollection &fieldIndexes, const std::shared_ptr &onWriteDone); /** - * Invert a document. - * - * @param docId local id for document - * @param doc the document + * Invert (add) the given document. * + * This function is async: + * For each text and uri field in the document a task for inverting and adding that + * field (using a field inverter) is added to the 'invert threads' executor, then this function returns. **/ void invertDocument(uint32_t docId, const document::Document &doc); /** - * Remove a document. + * Remove the given document. * - * @param docId local id for document + * This function is async: + * For each text and uri field in the index schema a task for removing this document + * (using a field inverter) is added to the 'invert threads' executor', then this function returns. */ void removeDocument(uint32_t docId); diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h index 94d44eaf44d..72b2e8e01bc 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h +++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h @@ -9,6 +9,9 @@ namespace search::memoryindex { +/** + * Class storing DocIdAndFeatures in an underlying DataStore, using 32-bit refs to access entries. + */ class FeatureStore { public: using DataStoreType = datastore::DataStoreT>; @@ -104,8 +107,9 @@ public: /** - * Get features from feature store. Method signature is not - * const since feature decoder is written to during calculation. + * Get features from feature store. + * + * Method signature is not const since feature decoder is written to during calculation. * * @param packedIndex The field or field collection owning features * @param ref Reference to stored features @@ -115,8 +119,7 @@ public: /** - * Setup the given decoder to be used for the given field or field - * collection. + * Setup the given decoder to be used for the given field or field collection. * * @param packedIndex The field or field collection owning features * @param decoder The feature decoder @@ -126,8 +129,7 @@ public: } /** - * Setup the given decoder to later use readFeatures() to decode - * the stored features. + * Setup the given decoder to later use readFeatures() to decode the stored features. * * @param ref Reference to stored features * @param decoder The feature decoder @@ -144,8 +146,7 @@ public: } /** - * Setup the given decoder to later use unpackFeatures() to decode - * the stored features. + * Setup the given decoder to later use unpackFeatures() to decode the stored features. * * @param ref Reference to stored features * @param decoder The feature decoder @@ -155,8 +156,9 @@ public: } /** - * Calculate size of encoded features. Method signature is not - * const since feature decoder is written to during calculation. + * Calculate size of encoded features. + * + * Method signature is not const since feature decoder is written to during calculation. * * @param packedIndex The field or field collection owning features * @param ref Reference to stored features @@ -167,7 +169,7 @@ public: /** * Get byte address of stored features * - * @param ref Referennce to stored features + * @param ref Reference to stored features * @return byte address of stored features */ const uint8_t *getBits(datastore::EntryRef ref) const { @@ -184,11 +186,6 @@ public: */ datastore::EntryRef moveFeatures(uint32_t packedIndex, datastore::EntryRef ref); - /** - * Return a const view of the fields params used by this feature store. - * - * @return const view of fields params. - */ const std::vector &getFieldsParams() const { return _fieldsParams; } void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); } diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h index 3b0675b5fdf..918e1a05a07 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h @@ -19,7 +19,7 @@ namespace search::memoryindex { class OrderedFieldIndexInserter; /** - * Memory index for a single field. + * Memory index for a single field using lock-free B-Trees in underlying components. * * It consists of the following components: * - WordStore containing all unique words in this field (across all documents). diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h index ecf2f8d8979..ba6a0e96698 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h +++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h @@ -18,6 +18,12 @@ namespace search::memoryindex { class IOrderedFieldIndexInserter; class FieldIndexRemover; +/** + * Class used to invert a field for a set of documents, preparing for pushing changes into the corresponding FieldIndex. + * + * It creates a set of sorted {word, docId, features} tuples based on the field content of the documents, + * and uses this when updating the posting lists of the FieldIndex. + */ class FieldInverter : public IFieldIndexRemoveListener { public: class PosInfo { @@ -169,7 +175,7 @@ private: using SpanTermVector = std::vector; SpanTermVector _terms; - // info about aborted and pending documents. + // Info about aborted and pending documents. std::vector _abortedDocs; std::map _pendingDocs; std::vector _removeDocs; @@ -178,66 +184,37 @@ private: invertNormalDocTextField(const document::FieldValue &val); public: - /** - * Start a new element - * - * @param weight element weight - */ void startElement(int32_t weight); - /** - * End an element. - */ void endElement(); private: /** - * Save field value as word in word buffer. - * - * @param word word to be saved - * @param len length of word to be saved. - * - * @return word reference + * Save the given word in the word buffer and return the word reference. */ VESPA_DLL_LOCAL uint32_t saveWord(const vespalib::stringref word); /** - * Save field value as word in word buffer. - * - * @param fv field value containing word to be stored - * - * @return word reference + * Save the field value as a word in the word buffer and return the word reference. */ VESPA_DLL_LOCAL uint32_t saveWord(const document::FieldValue &fv); /** * Get pointer to saved word from a word reference. - * - * @param wordRef word reference - * - * @return saved word */ const char *getWordFromRef(uint32_t wordRef) const { return &_words[static_cast(wordRef) << 2]; } /** - * Get pointer to saved word from a word number - * - * @param wordNum word number - * - * @return saved word + * Get pointer to saved word from a word number. */ const char *getWordFromNum(uint32_t wordNum) const { return getWordFromRef(_wordRefs[wordNum]); } /** - * Get word number from word reference - * - * @param wordRef word reference - * - * @return word number + * Get word number from word reference. */ uint32_t getWordNum(uint32_t wordRef) const { const char *p = &_words[static_cast(wordRef - 1) << 2]; @@ -245,10 +222,7 @@ private: } /** - * Update mapping from word reference to word number - * - * @param wordRef word reference - * @param wordNum word number + * Update mapping from word reference to word number. */ void updateWordNum(uint32_t wordRef, uint32_t wordNum) { char *p = &_words[static_cast(wordRef - 1) << 2]; @@ -256,14 +230,10 @@ private: } /** - * Add a word reference to posting list. Don't step word pos. - * - * - * @param wordRef word reference + * Add a word reference to posting list (but don't step word pos). */ void add(uint32_t wordRef) { - _positions.emplace_back(wordRef, _docId, _elem, - _wpos, _elems.size() - 1); + _positions.emplace_back(wordRef, _docId, _elem, _wpos, _elems.size() - 1); } void stepWordPos() { ++_wpos; } @@ -282,11 +252,6 @@ private: void processNormalDocWeightedSetTextField(const document::WeightedSetFieldValue &field); - /** - * Obtain the schema used by this index. - * - * @return schema used by this index - */ const index::Schema &getSchema() const { return _schema; } /** @@ -295,8 +260,7 @@ private: void reset(); /** - * Calculate word numbers and replace word references with word - * numbers in internal memory structures. + * Calculate word numbers and replace word references with word numbers in internal memory structures. */ void sortWords(); @@ -304,43 +268,36 @@ private: void trimAbortedDocs(); - /* + /** * Abort a pending document that has already been inverted. - * - * @param docId local id for document - * */ void abortPendingDoc(uint32_t docId); public: /** - * Create a new memory index based on the given schema. - * - * @param schema the index schema to use - * @param schema the field to be inverted + * Create a new field inverter for the given fieldId, using the given schema. */ FieldInverter(const index::Schema &schema, uint32_t fieldId); - /* - * Apply pending removes. + /** + * Apply pending removes using the given remover. * - * @param remover document remover + * The remover is tracking all {word, docId} tuples that should removed, + * and forwards this to the remove() function in this class (via IFieldIndexRemoveListener interface). */ void applyRemoves(FieldIndexRemover &remover); /** - * Push inverted documents to field index structure using the given inserter. - * - * Temporary restriction: Currently only one document at a time is supported. + * Push the current batch of inverted documents to the FieldIndex using the given inserter. */ void pushDocuments(IOrderedFieldIndexInserter &inserter); - /* + /** * Invert a normal text field, based on annotations. */ void invertField(uint32_t docId, const document::FieldValue::UP &val); - /* + /** * Setup remove of word in old version of document. */ virtual void remove(const vespalib::stringref word, uint32_t docId) override; diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h index 0b74e05c619..f390edd0718 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h @@ -20,9 +20,20 @@ class DocumentInverter; class FieldIndexCollection; /** - * Lock-free implementation of a memory-based index - * using the document inverter and dictionary classes from searchlib. - **/ + * Memory index for a set of text and uri fields that uses lock-free B-Trees in underlying components. + * + * Each field is handled separately by a FieldIndex that contains postings lists for all unique words in that field. + * + * Documents are inserted and removed from the underlying field indexes in a two-step process: + * 1) Call the async functions insertDocument() / removeDocument(). + * This adds tasks to invert / remove the fields in the documents to the 'invert threads' executor. + * 2) Call the async function commit(). + * This adds tasks to push the changes into the field indexes to the 'push threads' executor. + * When commit is completed a completion callback is signaled. + * + * Use createBlueprint() to search the memory index for a given term in a given field. + * + */ class MemoryIndex : public queryeval::Searchable { private: index::Schema _schema; @@ -70,71 +81,59 @@ public: /** * Create a new memory index based on the given schema. * - * @param schema the index schema to use - **/ + * @param schema the schema with which text and uri fields to keep in the index. + * @param invertThreads the executor with threads for doing document inverting. + * @param pushThreads the executor with threads for doing pushing of changes (inverted documents) + * to corresponding field indexes. + */ MemoryIndex(const index::Schema &schema, ISequencedTaskExecutor &invertThreads, ISequencedTaskExecutor &pushThreads); - /** - * Class destructor. Clean up washlist. - */ ~MemoryIndex(); - /** - * Obtain the schema used by this index. - * - * @return schema used by this index - **/ const index::Schema &getSchema() const { return _schema; } - /** - * Check if this index is frozen. - * - * @return true if this index is frozen - **/ bool isFrozen() const { return _frozen; } /** - * Insert a document into the index. If the document is already in - * the index, the old version will be removed first. + * Insert a document into the underlying field indexes. * - * @param docId local document id. - * @param doc the document to insert. - **/ + * If the document is already in the index, the old version will be removed first. + * This function is async. commit() must be called for changes to take effect. + */ void insertDocument(uint32_t docId, const document::Document &doc); /** - * Remove a document from the index. + * Remove a document from the underlying field indexes. * - * @param docId local document id. - **/ + * This function is async. commit() must be called for changes to take effect. + */ void removeDocument(uint32_t docId); /** - * Commits the inserts and removes since the last commit, making - * them searchable. When commit is completed, onWriteDone goes out - * of scope, scheduling completion callback. + * Commits the inserts and removes since the last commit, making them searchable. + * + * When commit is completed, 'onWriteDone' goes out of scope, scheduling completion callback. * * Callers can call pushThreads.sync() to wait for push completion. - **/ + */ void commit(const std::shared_ptr &onWriteDone); /** - * Freeze this index. Further index updates will be - * discarded. Extra information kept to wash the posting lists - * will be discarded. - **/ + * Freeze this index. + * + * Further index updates will be discarded. + * Extra information kept to wash the posting lists will be discarded. + */ void freeze(); /** * Dump the contents of this index into the given index builder. - * - * @param indexBuilder the builder to dump into - **/ + */ void dump(index::IndexBuilder &indexBuilder); - // implements Searchable + // Implements Searchable queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext, const queryeval::FieldSpec &field, const query::Node &term) override; @@ -162,9 +161,7 @@ public: /** * Gets an approximation of how much memory the index uses. - * - * @return approximately how much memory is used by the index. - **/ + */ MemoryUsage getMemoryUsage() const; uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; } -- cgit v1.2.3