summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-04-12 07:29:51 +0000
committerGeir Storli <geirst@verizonmedia.com>2019-04-12 11:41:40 +0000
commit77af8383c19ebf0854e800b85879d7207a7468eb (patch)
treeed342a99eef17e4da5726f9a6bc2d4692a342d8d /searchlib
parent28a9be2321136a976bdc3bc5b45cef084f81d815 (diff)
Add and improve class comments.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/document_inverter.h42
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/feature_store.h29
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/field_inverter.h91
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.h79
5 files changed, 104 insertions, 139 deletions
diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
index 5c2d9cc84ed..158302ddea5 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/document_inverter.h
@@ -24,6 +24,11 @@ class FieldInverter;
class UrlFieldInverter;
class FieldIndexCollection;
+/**
+ * Class used to invert the fields for a set of documents, preparing for pushing changes info field indexes.
+ *
+ * Each text and uri field in the document is handled separately by a FieldInverter and UrlFieldInverter.
+ */
class DocumentInverter {
private:
DocumentInverter(const DocumentInverter &) = delete;
@@ -48,18 +53,16 @@ private:
ISequencedTaskExecutor &_invertThreads;
ISequencedTaskExecutor &_pushThreads;
- /**
- * Obtain the schema used by this index.
- *
- * @return schema used by this index
- */
const index::Schema &getSchema() const { return _schema; }
public:
/**
- * Create a new memory index based on the given schema.
+ * Create a new document inverter based on the given schema.
*
- * @param schema the index schema to use
+ * @param schema the schema with which text and uri fields to consider.
+ * @param invertThreads the executor with threads for doing document inverting.
+ * @param pushThreads the executor with threads for doing pushing of inverted documents
+ * to corresponding field indexes.
*/
DocumentInverter(const index::Schema &schema,
ISequencedTaskExecutor &invertThreads,
@@ -68,23 +71,34 @@ public:
~DocumentInverter();
/**
- * Push inverted documents to memory field indexes.
+ * Push the current batch of inverted documents to corresponding field indexes.
+ *
+ * This function is async:
+ * For each field inverter a task for pushing the inverted documents to the corresponding field index
+ * is added to the 'push threads' executor, then this function returns.
+ * All tasks hold a reference to the 'onWriteDone' callback, so when the last task is completed,
+ * the callback is destructed.
+ *
+ * NOTE: The caller of this function should sync the 'invert threads' executor first,
+ * to ensure that inverting is completed before pushing starts.
*/
void pushDocuments(FieldIndexCollection &fieldIndexes, const std::shared_ptr<IDestructorCallback> &onWriteDone);
/**
- * Invert a document.
- *
- * @param docId local id for document
- * @param doc the document
+ * Invert (add) the given document.
*
+ * This function is async:
+ * For each text and uri field in the document a task for inverting and adding that
+ * field (using a field inverter) is added to the 'invert threads' executor, then this function returns.
**/
void invertDocument(uint32_t docId, const document::Document &doc);
/**
- * Remove a document.
+ * Remove the given document.
*
- * @param docId local id for document
+ * This function is async:
+ * For each text and uri field in the index schema a task for removing this document
+ * (using a field inverter) is added to the 'invert threads' executor', then this function returns.
*/
void removeDocument(uint32_t docId);
diff --git a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
index 94d44eaf44d..72b2e8e01bc 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/feature_store.h
@@ -9,6 +9,9 @@
namespace search::memoryindex {
+/**
+ * Class storing DocIdAndFeatures in an underlying DataStore, using 32-bit refs to access entries.
+ */
class FeatureStore {
public:
using DataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<22, 2>>;
@@ -104,8 +107,9 @@ public:
/**
- * Get features from feature store. Method signature is not
- * const since feature decoder is written to during calculation.
+ * Get features from feature store.
+ *
+ * Method signature is not const since feature decoder is written to during calculation.
*
* @param packedIndex The field or field collection owning features
* @param ref Reference to stored features
@@ -115,8 +119,7 @@ public:
/**
- * Setup the given decoder to be used for the given field or field
- * collection.
+ * Setup the given decoder to be used for the given field or field collection.
*
* @param packedIndex The field or field collection owning features
* @param decoder The feature decoder
@@ -126,8 +129,7 @@ public:
}
/**
- * Setup the given decoder to later use readFeatures() to decode
- * the stored features.
+ * Setup the given decoder to later use readFeatures() to decode the stored features.
*
* @param ref Reference to stored features
* @param decoder The feature decoder
@@ -144,8 +146,7 @@ public:
}
/**
- * Setup the given decoder to later use unpackFeatures() to decode
- * the stored features.
+ * Setup the given decoder to later use unpackFeatures() to decode the stored features.
*
* @param ref Reference to stored features
* @param decoder The feature decoder
@@ -155,8 +156,9 @@ public:
}
/**
- * Calculate size of encoded features. Method signature is not
- * const since feature decoder is written to during calculation.
+ * Calculate size of encoded features.
+ *
+ * Method signature is not const since feature decoder is written to during calculation.
*
* @param packedIndex The field or field collection owning features
* @param ref Reference to stored features
@@ -167,7 +169,7 @@ public:
/**
* Get byte address of stored features
*
- * @param ref Referennce to stored features
+ * @param ref Reference to stored features
* @return byte address of stored features
*/
const uint8_t *getBits(datastore::EntryRef ref) const {
@@ -184,11 +186,6 @@ public:
*/
datastore::EntryRef moveFeatures(uint32_t packedIndex, datastore::EntryRef ref);
- /**
- * Return a const view of the fields params used by this feature store.
- *
- * @return const view of fields params.
- */
const std::vector<PosOccFieldsParams> &getFieldsParams() const { return _fieldsParams; }
void trimHoldLists(generation_t usedGen) { _store.trimHoldLists(usedGen); }
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_index.h b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
index 3b0675b5fdf..918e1a05a07 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_index.h
@@ -19,7 +19,7 @@ namespace search::memoryindex {
class OrderedFieldIndexInserter;
/**
- * Memory index for a single field.
+ * Memory index for a single field using lock-free B-Trees in underlying components.
*
* It consists of the following components:
* - WordStore containing all unique words in this field (across all documents).
diff --git a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
index ecf2f8d8979..ba6a0e96698 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/field_inverter.h
@@ -18,6 +18,12 @@ namespace search::memoryindex {
class IOrderedFieldIndexInserter;
class FieldIndexRemover;
+/**
+ * Class used to invert a field for a set of documents, preparing for pushing changes into the corresponding FieldIndex.
+ *
+ * It creates a set of sorted {word, docId, features} tuples based on the field content of the documents,
+ * and uses this when updating the posting lists of the FieldIndex.
+ */
class FieldInverter : public IFieldIndexRemoveListener {
public:
class PosInfo {
@@ -169,7 +175,7 @@ private:
using SpanTermVector = std::vector<SpanTerm>;
SpanTermVector _terms;
- // info about aborted and pending documents.
+ // Info about aborted and pending documents.
std::vector<PositionRange> _abortedDocs;
std::map<uint32_t, PositionRange> _pendingDocs;
std::vector<uint32_t> _removeDocs;
@@ -178,66 +184,37 @@ private:
invertNormalDocTextField(const document::FieldValue &val);
public:
- /**
- * Start a new element
- *
- * @param weight element weight
- */
void startElement(int32_t weight);
- /**
- * End an element.
- */
void endElement();
private:
/**
- * Save field value as word in word buffer.
- *
- * @param word word to be saved
- * @param len length of word to be saved.
- *
- * @return word reference
+ * Save the given word in the word buffer and return the word reference.
*/
VESPA_DLL_LOCAL uint32_t saveWord(const vespalib::stringref word);
/**
- * Save field value as word in word buffer.
- *
- * @param fv field value containing word to be stored
- *
- * @return word reference
+ * Save the field value as a word in the word buffer and return the word reference.
*/
VESPA_DLL_LOCAL uint32_t saveWord(const document::FieldValue &fv);
/**
* Get pointer to saved word from a word reference.
- *
- * @param wordRef word reference
- *
- * @return saved word
*/
const char *getWordFromRef(uint32_t wordRef) const {
return &_words[static_cast<size_t>(wordRef) << 2];
}
/**
- * Get pointer to saved word from a word number
- *
- * @param wordNum word number
- *
- * @return saved word
+ * Get pointer to saved word from a word number.
*/
const char *getWordFromNum(uint32_t wordNum) const {
return getWordFromRef(_wordRefs[wordNum]);
}
/**
- * Get word number from word reference
- *
- * @param wordRef word reference
- *
- * @return word number
+ * Get word number from word reference.
*/
uint32_t getWordNum(uint32_t wordRef) const {
const char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
@@ -245,10 +222,7 @@ private:
}
/**
- * Update mapping from word reference to word number
- *
- * @param wordRef word reference
- * @param wordNum word number
+ * Update mapping from word reference to word number.
*/
void updateWordNum(uint32_t wordRef, uint32_t wordNum) {
char *p = &_words[static_cast<size_t>(wordRef - 1) << 2];
@@ -256,14 +230,10 @@ private:
}
/**
- * Add a word reference to posting list. Don't step word pos.
- *
- *
- * @param wordRef word reference
+ * Add a word reference to posting list (but don't step word pos).
*/
void add(uint32_t wordRef) {
- _positions.emplace_back(wordRef, _docId, _elem,
- _wpos, _elems.size() - 1);
+ _positions.emplace_back(wordRef, _docId, _elem, _wpos, _elems.size() - 1);
}
void stepWordPos() { ++_wpos; }
@@ -282,11 +252,6 @@ private:
void
processNormalDocWeightedSetTextField(const document::WeightedSetFieldValue &field);
- /**
- * Obtain the schema used by this index.
- *
- * @return schema used by this index
- */
const index::Schema &getSchema() const { return _schema; }
/**
@@ -295,8 +260,7 @@ private:
void reset();
/**
- * Calculate word numbers and replace word references with word
- * numbers in internal memory structures.
+ * Calculate word numbers and replace word references with word numbers in internal memory structures.
*/
void sortWords();
@@ -304,43 +268,36 @@ private:
void trimAbortedDocs();
- /*
+ /**
* Abort a pending document that has already been inverted.
- *
- * @param docId local id for document
- *
*/
void abortPendingDoc(uint32_t docId);
public:
/**
- * Create a new memory index based on the given schema.
- *
- * @param schema the index schema to use
- * @param schema the field to be inverted
+ * Create a new field inverter for the given fieldId, using the given schema.
*/
FieldInverter(const index::Schema &schema, uint32_t fieldId);
- /*
- * Apply pending removes.
+ /**
+ * Apply pending removes using the given remover.
*
- * @param remover document remover
+ * The remover is tracking all {word, docId} tuples that should removed,
+ * and forwards this to the remove() function in this class (via IFieldIndexRemoveListener interface).
*/
void applyRemoves(FieldIndexRemover &remover);
/**
- * Push inverted documents to field index structure using the given inserter.
- *
- * Temporary restriction: Currently only one document at a time is supported.
+ * Push the current batch of inverted documents to the FieldIndex using the given inserter.
*/
void pushDocuments(IOrderedFieldIndexInserter &inserter);
- /*
+ /**
* Invert a normal text field, based on annotations.
*/
void invertField(uint32_t docId, const document::FieldValue::UP &val);
- /*
+ /**
* Setup remove of word in old version of document.
*/
virtual void remove(const vespalib::stringref word, uint32_t docId) override;
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
index 0b74e05c619..f390edd0718 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.h
@@ -20,9 +20,20 @@ class DocumentInverter;
class FieldIndexCollection;
/**
- * Lock-free implementation of a memory-based index
- * using the document inverter and dictionary classes from searchlib.
- **/
+ * Memory index for a set of text and uri fields that uses lock-free B-Trees in underlying components.
+ *
+ * Each field is handled separately by a FieldIndex that contains postings lists for all unique words in that field.
+ *
+ * Documents are inserted and removed from the underlying field indexes in a two-step process:
+ * 1) Call the async functions insertDocument() / removeDocument().
+ * This adds tasks to invert / remove the fields in the documents to the 'invert threads' executor.
+ * 2) Call the async function commit().
+ * This adds tasks to push the changes into the field indexes to the 'push threads' executor.
+ * When commit is completed a completion callback is signaled.
+ *
+ * Use createBlueprint() to search the memory index for a given term in a given field.
+ *
+ */
class MemoryIndex : public queryeval::Searchable {
private:
index::Schema _schema;
@@ -70,71 +81,59 @@ public:
/**
* Create a new memory index based on the given schema.
*
- * @param schema the index schema to use
- **/
+ * @param schema the schema with which text and uri fields to keep in the index.
+ * @param invertThreads the executor with threads for doing document inverting.
+ * @param pushThreads the executor with threads for doing pushing of changes (inverted documents)
+ * to corresponding field indexes.
+ */
MemoryIndex(const index::Schema &schema,
ISequencedTaskExecutor &invertThreads,
ISequencedTaskExecutor &pushThreads);
- /**
- * Class destructor. Clean up washlist.
- */
~MemoryIndex();
- /**
- * Obtain the schema used by this index.
- *
- * @return schema used by this index
- **/
const index::Schema &getSchema() const { return _schema; }
- /**
- * Check if this index is frozen.
- *
- * @return true if this index is frozen
- **/
bool isFrozen() const { return _frozen; }
/**
- * Insert a document into the index. If the document is already in
- * the index, the old version will be removed first.
+ * Insert a document into the underlying field indexes.
*
- * @param docId local document id.
- * @param doc the document to insert.
- **/
+ * If the document is already in the index, the old version will be removed first.
+ * This function is async. commit() must be called for changes to take effect.
+ */
void insertDocument(uint32_t docId, const document::Document &doc);
/**
- * Remove a document from the index.
+ * Remove a document from the underlying field indexes.
*
- * @param docId local document id.
- **/
+ * This function is async. commit() must be called for changes to take effect.
+ */
void removeDocument(uint32_t docId);
/**
- * Commits the inserts and removes since the last commit, making
- * them searchable. When commit is completed, onWriteDone goes out
- * of scope, scheduling completion callback.
+ * Commits the inserts and removes since the last commit, making them searchable.
+ *
+ * When commit is completed, 'onWriteDone' goes out of scope, scheduling completion callback.
*
* Callers can call pushThreads.sync() to wait for push completion.
- **/
+ */
void commit(const std::shared_ptr<IDestructorCallback> &onWriteDone);
/**
- * Freeze this index. Further index updates will be
- * discarded. Extra information kept to wash the posting lists
- * will be discarded.
- **/
+ * Freeze this index.
+ *
+ * Further index updates will be discarded.
+ * Extra information kept to wash the posting lists will be discarded.
+ */
void freeze();
/**
* Dump the contents of this index into the given index builder.
- *
- * @param indexBuilder the builder to dump into
- **/
+ */
void dump(index::IndexBuilder &indexBuilder);
- // implements Searchable
+ // Implements Searchable
queryeval::Blueprint::UP createBlueprint(const queryeval::IRequestContext & requestContext,
const queryeval::FieldSpec &field,
const query::Node &term) override;
@@ -162,9 +161,7 @@ public:
/**
* Gets an approximation of how much memory the index uses.
- *
- * @return approximately how much memory is used by the index.
- **/
+ */
MemoryUsage getMemoryUsage() const;
uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; }