Add and improve class comments.

author: Geir Storli <geirst@verizonmedia.com> 2019-04-12 07:29:51 +0000
committer: Geir Storli <geirst@verizonmedia.com> 2019-04-12 11:41:40 +0000
commit: 77af8383c19ebf0854e800b85879d7207a7468eb (patch)
tree: ed342a99eef17e4da5726f9a6bc2d4692a342d8d /searchlib
parent: 28a9be2321136a976bdc3bc5b45cef084f81d815 (diff)
5 files changed, 104 insertions, 139 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
index 5c2d9cc84ed..158302ddea5 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
@@ -24,6 +24,11 @@ class FieldInverter;
 class UrlFieldInverter;
 class FieldIndexCollection;
 
+/**
+ * Class used to invert the fields for a set of documents, preparing for pushing changes info field indexes.
+ *
+ * Each text and uri field in the document is handled separately by a FieldInverter and UrlFieldInverter.
+ */
 class DocumentInverter {
 private:
     DocumentInverter(const DocumentInverter &) = delete;
@@ -48,18 +53,16 @@ private:
     ISequencedTaskExecutor &_invertThreads;
     ISequencedTaskExecutor &_pushThreads;
 
-    /**
-     * Obtain the schema used by this index.
-     *
-     * @return schema used by this index
-     */
     const index::Schema &getSchema() const { return _schema; }
 
 public:
     /**
-     * Create a new memory index based on the given schema.
+     * Create a new document inverter based on the given schema.
      *
-     * @param schema the index schema to use
+     * @param schema        the schema with which text and uri fields to consider.
+     * @param invertThreads the executor with threads for doing document inverting.
+     * @param pushThreads   the executor with threads for doing pushing of inverted documents
+     *                      to corresponding field indexes.
      */
     DocumentInverter(const index::Schema &schema,
                      ISequencedTaskExecutor &invertThreads,
@@ -68,23 +71,34 @@ public:
     ~DocumentInverter();
 
     /**
-     * Push inverted documents to memory field indexes.
+     * Push the current batch of inverted documents to corresponding field indexes.
+     *
+     * This function is async:
+     * For each field inverter a task for pushing the inverted documents to the corresponding field index
+     * is added to the 'push threads' executor, then this function returns.
+     * All tasks hold a reference to the 'onWriteDone' callback, so when the last task is completed,
+     * the callback is destructed.
+     *
+     * NOTE: The caller of this function should sync the 'invert threads' executor first,
+     * to ensure that inverting is completed before pushing starts.
      */
     void pushDocuments(FieldIndexCollection &fieldIndexes, const std::shared_ptr<IDestructorCallback> &onWriteDone);
 
     /**
-     * Invert a document.
-     *
-     * @param docId            local id for document
-     * @param doc              the document
+     * Invert (add) the given document.
      *
+     * This function is async:
+     * For each text and uri field in the document a task for inverting and adding that
+     * field (using a field inverter) is added to the 'invert threads' executor, then this function returns.
      **/
     void invertDocument(uint32_t docId, const document::Document &doc);
 
     /**
-     * Remove a document.
+     * Remove the given document.
      *
-     * @param docId            local id for document
+     * This function is async:
+     * For each text and uri field in the index schema a task for removing this document
+     * (using a field inverter) is added to the 'invert threads' executor', then this function returns.
      */
     void removeDocument(uint32_t docId);
 
diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
index 94d44eaf44d..72b2e8e01bc 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
@@ -9,6 +9,9 @@
 
 namespace search::memoryindex {
 
+/**
+ * Class storing DocIdAndFeatures in an underlying DataStore, using 32-bit refs to access entries.
+ */
 class FeatureStore {
 public:
     using DataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2>>;
@@ -104,8 +107,9 @@ public:
 
 
     /**
-     * Get features from feature store.  Method signature is not
-     * const since feature decoder is written to during calculation.
+     * Get features from feature store.
+     *
+     * Method signature is not const since feature decoder is written to during calculation.
      *
      * @param packedIndex The field or field collection owning features
      * @param ref         Reference to stored features
@@ -115,8 +119,7 @@ public:
 
 
     /**
-     * Setup the given decoder to be used for the given field or field
-     * collection.
+     * Setup the given decoder to be used for the given field or field collection.
      *
      * @param packedIndex The field or field collection owning features
      * @param decoder     The feature decoder
@@ -126,8 +129,7 @@ public:
     }
 
     /**
-     * Setup the given decoder to later use readFeatures() to decode
-     * the stored features.
+     * Setup the given decoder to later use readFeatures() to decode the stored features.
      *
      * @param ref      Reference to stored features
      * @param decoder  The feature decoder
@@ -144,8 +146,7 @@ public:
     }
 
     /**
-     * Setup the given decoder to later use unpackFeatures() to decode
-     * the stored features.
+     * Setup the given decoder to later use unpackFeatures() to decode the stored features.
      *
      * @param ref      Reference to stored features
      * @param decoder  The feature decoder
@@ -155,8 +156,9 @@ public:
     }
 
     /**
-     * Calculate size of encoded features.  Method signature is not
-     * const since feature decoder is written to during calculation.
+     * Calculate size of encoded features.
+     *
+     * Method signature is not const since feature decoder is written to during calculation.
      *
      * @param packedIndex The field or field collection owning features
      * @param ref         Reference to stored features
@@ -167,7 +169,7 @@ public:
     /**
      * Get byte address of stored features
      *
-     * @param ref Referennce to stored features
+     * @param ref Reference to stored features
      * @return    byte address of stored features
      */
     const uint8_t *getBits(datastore::EntryRef ref) const {
@@ -184,11 +186,6 @@ public:
      */
     datastore::EntryRef moveFeatures(uint32_t packedIndex, datastore::EntryRef ref);
 
-    /**
-     * Return a const view of the fields params used by this feature store.
-     *
-     * @return const view of fields params.
-     */
     const std::vector<PosOccFieldsParams> &getFieldsParams() const { return _fieldsParams; }
 
     void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); }
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 3b0675b5fdf..918e1a05a07 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -19,7 +19,7 @@ namespace search::memoryindex {
 class OrderedFieldIndexInserter;
 
 /**
- * Memory index for a single field.
+ * Memory index for a single field using lock-free B-Trees in underlying components.
  *
  * It consists of the following components:
  *   - WordStore containing all unique words in this field (across all documents).
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
index ecf2f8d8979..ba6a0e96698 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
@@ -18,6 +18,12 @@ namespace search::memoryindex {
 class IOrderedFieldIndexInserter;
 class FieldIndexRemover;
 
+/**
+ * Class used to invert a field for a set of documents, preparing for pushing changes into the corresponding FieldIndex.
+ *
+ * It creates a set of sorted {word, docId, features} tuples based on the field content of the documents,
+ * and uses this when updating the posting lists of the FieldIndex.
+ */
 class FieldInverter : public IFieldIndexRemoveListener {
 public:
     class PosInfo {
@@ -169,7 +175,7 @@ private:
     using SpanTermVector = std::vector<SpanTerm>;
     SpanTermVector                      _terms;
 
-    // info about aborted and pending documents.
+    // Info about aborted and pending documents.
     std::vector<PositionRange>      _abortedDocs;
     std::map<uint32_t, PositionRange> _pendingDocs;
     std::vector<uint32_t>             _removeDocs;
@@ -178,66 +184,37 @@ private:
     invertNormalDocTextField(const document::FieldValue &val);
 
 public:
-    /**
-     * Start a new element
-     *
-     * @param weight        element weight
-     */
     void startElement(int32_t weight);
 
-    /**
-     * End an element.
-     */
     void endElement();
 
 private:
     /**
-     * Save field value as word in word buffer.
-     *
-     * @param word      word to be saved
-     * @param len       length of word to be saved.
-     *
-     * @return          word reference
+     * Save the given word in the word buffer and return the word reference.
      */
     VESPA_DLL_LOCAL uint32_t saveWord(const vespalib::stringref word);
 
     /**
-     * Save field value as word in word buffer.
-     *
-     * @param fv        field value containing word to be stored
-     *
-     * @return          word reference
+     * Save the field value as a word in the word buffer and return the word reference.
      */
     VESPA_DLL_LOCAL uint32_t saveWord(const document::FieldValue &fv);
 
     /**
      * Get pointer to saved word from a word reference.
-     *
-     * @param wordRef       word reference
-     *
-     * @return          saved word
      */
     const char *getWordFromRef(uint32_t wordRef) const {
         return &_words[static_cast<size_t>(wordRef) << 2];
     }
 
     /**
-     * Get pointer to saved word from a word number
-     *
-     * @param wordNum       word number
-     *
-     * @return          saved word
+     * Get pointer to saved word from a word number.
      */
     const char *getWordFromNum(uint32_t wordNum) const {
         return getWordFromRef(_wordRefs[wordNum]);
     }
 
     /**
-     * Get word number from word reference
-     *
-     * @param wordRef       word reference
-     *
-     * @return          word number
+     * Get word number from word reference.
      */
     uint32_t getWordNum(uint32_t wordRef) const {
         const char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
@@ -245,10 +222,7 @@ private:
     }
 
     /**
-     * Update mapping from word reference to word number
-     *
-     * @param wordRef       word reference
-     * @param wordNum       word number
+     * Update mapping from word reference to word number.
      */
     void updateWordNum(uint32_t wordRef, uint32_t wordNum) {
         char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
@@ -256,14 +230,10 @@ private:
     }
 
     /**
-     * Add a word reference to posting list.  Don't step word pos.
-     *
-     *
-     * @param wordRef       word reference
+     * Add a word reference to posting list (but don't step word pos).
      */
     void add(uint32_t wordRef) {
-        _positions.emplace_back(wordRef, _docId, _elem,
-                                _wpos, _elems.size() - 1);
+        _positions.emplace_back(wordRef, _docId, _elem, _wpos, _elems.size() - 1);
     }
 
     void stepWordPos() { ++_wpos; }
@@ -282,11 +252,6 @@ private:
     void
     processNormalDocWeightedSetTextField(const document::WeightedSetFieldValue &field);
 
-    /**
-     * Obtain the schema used by this index.
-     *
-     * @return schema used by this index
-     */
     const index::Schema &getSchema() const { return _schema; }
 
     /**
@@ -295,8 +260,7 @@ private:
     void reset();
 
     /**
-     * Calculate word numbers and replace word references with word
-     * numbers in internal memory structures.
+     * Calculate word numbers and replace word references with word numbers in internal memory structures.
      */
     void sortWords();
 
@@ -304,43 +268,36 @@ private:
 
     void trimAbortedDocs();
 
-    /*
+    /**
      * Abort a pending document that has already been inverted.
-     *
-     * @param docId            local id for document
-     *
      */
     void abortPendingDoc(uint32_t docId);
 
 public:
     /**
-     * Create a new memory index based on the given schema.
-     *
-     * @param schema the index schema to use
-     * @param schema the field to be inverted
+     * Create a new field inverter for the given fieldId, using the given schema.
      */
     FieldInverter(const index::Schema &schema, uint32_t fieldId);
 
-    /*
-     * Apply pending removes.
+    /**
+     * Apply pending removes using the given remover.
      *
-     * @param remover    document remover
+     * The remover is tracking all {word, docId} tuples that should removed,
+     * and forwards this to the remove() function in this class (via IFieldIndexRemoveListener interface).
      */
     void applyRemoves(FieldIndexRemover &remover);
 
     /**
-     * Push inverted documents to field index structure using the given inserter.
-     *
-     * Temporary restriction: Currently only one document at a time is supported.
+     * Push the current batch of inverted documents to the FieldIndex using the given inserter.
      */
     void pushDocuments(IOrderedFieldIndexInserter &inserter);
 
-    /*
+    /**
      * Invert a normal text field, based on annotations.
      */
     void invertField(uint32_t docId, const document::FieldValue::UP &val);
 
-    /*
+    /**
      * Setup remove of word in old version of document.
      */
     virtual void remove(const vespalib::stringref word, uint32_t docId) override;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index 0b74e05c619..f390edd0718 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -20,9 +20,20 @@ class DocumentInverter;
 class FieldIndexCollection;
 
 /**
- * Lock-free implementation of a memory-based index
- * using the document inverter and dictionary classes from searchlib.
- **/
+ * Memory index for a set of text and uri fields that uses lock-free B-Trees in underlying components.
+ *
+ * Each field is handled separately by a FieldIndex that contains postings lists for all unique words in that field.
+ *
+ * Documents are inserted and removed from the underlying field indexes in a two-step process:
+ *   1) Call the async functions insertDocument() / removeDocument().
+ *      This adds tasks to invert / remove the fields in the documents to the 'invert threads' executor.
+ *   2) Call the async function commit().
+ *      This adds tasks to push the changes into the field indexes to the 'push threads' executor.
+ *      When commit is completed a completion callback is signaled.
+ *
+ * Use createBlueprint() to search the memory index for a given term in a given field.
+ *
+ */
 class MemoryIndex : public queryeval::Searchable {
 private:
     index::Schema     _schema;
@@ -70,71 +81,59 @@ public:
     /**
      * Create a new memory index based on the given schema.
      *
-     * @param schema the index schema to use
-     **/
+     * @param schema        the schema with which text and uri fields to keep in the index.
+     * @param invertThreads the executor with threads for doing document inverting.
+     * @param pushThreads   the executor with threads for doing pushing of changes (inverted documents)
+     *                      to corresponding field indexes.
+     */
     MemoryIndex(const index::Schema &schema,
                 ISequencedTaskExecutor &invertThreads,
                 ISequencedTaskExecutor &pushThreads);
 
-    /**
-     * Class destructor.  Clean up washlist.
-     */
     ~MemoryIndex();
 
-    /**
-     * Obtain the schema used by this index.
-     *
-     * @return schema used by this index
-     **/
     const index::Schema &getSchema() const { return _schema; }
 
-    /**
-     * Check if this index is frozen.
-     *
-     * @return true if this index is frozen
-     **/
     bool isFrozen() const { return _frozen; }
 
     /**
-     * Insert a document into the index. If the document is already in
-     * the index, the old version will be removed first.
+     * Insert a document into the underlying field indexes.
      *
-     * @param docId local document id.
-     * @param doc the document to insert.
-     **/
+     * If the document is already in the index, the old version will be removed first.
+     * This function is async. commit() must be called for changes to take effect.
+     */
     void insertDocument(uint32_t docId, const document::Document &doc);
 
     /**
-     * Remove a document from the index.
+     * Remove a document from the underlying field indexes.
      *
-     * @param docId local document id.
-     **/
+     * This function is async. commit() must be called for changes to take effect.
+     */
     void removeDocument(uint32_t docId);
 
     /**
-     * Commits the inserts and removes since the last commit, making
-     * them searchable. When commit is completed, onWriteDone goes out
-     * of scope, scheduling completion callback.
+     * Commits the inserts and removes since the last commit, making them searchable.
+     *
+     * When commit is completed, 'onWriteDone' goes out of scope, scheduling completion callback.
      *
      * Callers can call pushThreads.sync() to wait for push completion.
-     **/
+     */
     void commit(const std::shared_ptr<IDestructorCallback> &onWriteDone);
 
     /**
-     * Freeze this index. Further index updates will be
-     * discarded. Extra information kept to wash the posting lists
-     * will be discarded.
-     **/
+     * Freeze this index.
+     *
+     * Further index updates will be discarded.
+     * Extra information kept to wash the posting lists will be discarded.
+     */
     void freeze();
 
     /**
      * Dump the contents of this index into the given index builder.
-     *
-     * @param indexBuilder the builder to dump into
-     **/
+     */
     void dump(index::IndexBuilder &indexBuilder);
 
-    // implements Searchable
+    // Implements Searchable
     queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext,
                                              const queryeval::FieldSpec &field,
                                              const query::Node &term) override;
@@ -162,9 +161,7 @@ public:
 
     /**
      * Gets an approximation of how much memory the index uses.
-     *
-     * @return approximately how much memory is used by the index.
-     **/
+     */
     MemoryUsage getMemoryUsage() const;
 
     uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; }
author	Geir Storli <geirst@verizonmedia.com>	2019-04-12 07:29:51 +0000
committer	Geir Storli <geirst@verizonmedia.com>	2019-04-12 11:41:40 +0000
commit	77af8383c19ebf0854e800b85879d7207a7468eb (patch)
tree	ed342a99eef17e4da5726f9a6bc2d4692a342d8d /searchlib
parent	28a9be2321136a976bdc3bc5b45cef084f81d815 (diff)