Add APIs to support setting a tensor and update nearest neighbor index as two-phase operations.

This will enable using multiple threads to update the nearest neighbor index.
author: Geir Storli <geirst@verizonmedia.com> 2020-06-09 14:53:30 +0000
committer: Geir Storli <geirst@verizonmedia.com> 2020-06-10 07:28:41 +0000
commit: 87e669fde4007de7675295687882d20b73e12aed (patch)
tree: cd941c9d7051ae5b5b076913d00529babc013160 /searchlib/src
parent: 66c66aa167c2ba431943ad7287da3f20b11a05ab (diff)
4 files changed, 85 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index c2d37f2d59a..725ac453ed8 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -3,6 +3,7 @@
 #pragma once
 
 #include "distance_function.h"
+#include "prepare_result.h"
 #include <vespa/eval/tensor/dense/typed_cells.h>
 #include <vespa/vespalib/util/generationhandler.h>
 #include <vespa/vespalib/util/memoryusage.h>
@@ -36,6 +37,38 @@ public:
     };
     virtual ~NearestNeighborIndex() {}
     virtual void add_document(uint32_t docid) = 0;
+
+    /**
+     * Performs the prepare step in a two-phase operation to add a document to the index.
+     *
+     * This function can be called by any thread.
+     * The document to add is represented by the given vector as it is _not_ stored in the enclosing tensor attribute at this point in time.
+     * It should return the result of the costly and non-modifying part of this operation.
+     * The given read guard must be kept in the result.
+     */
+    virtual std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid,
+                                                                vespalib::tensor::TypedCells vector,
+                                                                vespalib::GenerationHandler::Guard read_guard) const {
+        // TODO: Make it pure virtual after more wiring is complete.
+        (void) docid;
+        (void) vector;
+        (void) read_guard;
+        return std::unique_ptr<PrepareResult>();
+    }
+
+    /**
+     * Performs the complete step in a two-phase operation to add a document to the index.
+     *
+     * This function is only called by the attribute writer thread.
+     * It uses the result from the prepare step to do the modifying changes.
+     */
+    virtual void complete_add_document(uint32_t docid,
+                                       std::unique_ptr<PrepareResult> prepare_result) {
+        // TODO: Make it pure virtual after more wiring is complete.
+        (void) docid;
+        (void) prepare_result;
+    }
+
     virtual void remove_document(uint32_t docid) = 0;
     virtual void transfer_hold_lists(generation_t current_gen) = 0;
     virtual void trim_hold_lists(generation_t first_used_gen) = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/prepare_result.h b/searchlib/src/vespa/searchlib/tensor/prepare_result.h
new file mode 100644
index 00000000000..05300684497
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/prepare_result.h
@@ -0,0 +1,15 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::tensor {
+
+/**
+ * Interface for a class used to keep the result of the prepare step of a two-phase operation.
+ */
+class PrepareResult {
+public:
+    virtual ~PrepareResult() {}
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index 95af9f0471b..979eedec58a 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -253,6 +253,23 @@ TensorAttribute::getRefCopy() const
     return RefCopyVector(&_refVector[0], &_refVector[0] + size);
 }
 
+std::unique_ptr<PrepareResult>
+TensorAttribute::prepare_set_tensor(DocId docid, const Tensor& tensor) const
+{
+    (void) docid;
+    (void) tensor;
+    return std::unique_ptr<PrepareResult>();
+}
+
+void
+TensorAttribute::complete_set_tensor(DocId docid, const Tensor& tensor,
+                                     std::future<std::unique_ptr<PrepareResult>> prepare_result)
+{
+    (void) docid;
+    (void) tensor;
+    (void) prepare_result;
+}
+
 IMPLEMENT_IDENTIFIABLE_ABSTRACT(TensorAttribute, AttributeVector);
 
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
index e8efd2170c9..f752b9f7f2e 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
@@ -3,9 +3,11 @@
 #pragma once
 
 #include "i_tensor_attribute.h"
-#include <vespa/searchlib/attribute/not_implemented_attribute.h>
+#include "prepare_result.h"
 #include "tensor_store.h"
+#include <vespa/searchlib/attribute/not_implemented_attribute.h>
 #include <vespa/vespalib/util/rcuvector.h>
+#include <future>
 
 namespace search::tensor {
 
@@ -51,6 +53,23 @@ public:
     uint32_t getVersion() const override;
     RefCopyVector getRefCopy() const;
     virtual void setTensor(DocId docId, const Tensor &tensor) = 0;
+
+    /**
+     * Performs the prepare step in a two-phase operation to set a tensor for a document.
+     *
+     * This function can be called by any thread.
+     * It should return the result of the costly and non-modifying part of such operation.
+     */
+    virtual std::unique_ptr<PrepareResult> prepare_set_tensor(DocId docid, const Tensor& tensor) const;
+
+    /**
+     * Performs the complete step in a two-phase operation to set a tensor for a document.
+     *
+     * This function is only called by the attribute writer thread.
+     * It must wait for the result from the prepare step (via the future) before it does the modifying changes.
+     */
+    virtual void complete_set_tensor(DocId docid, const Tensor& tensor, std::future<std::unique_ptr<PrepareResult>> prepare_result);
+
     virtual void compactWorst() = 0;
 };
author	Geir Storli <geirst@verizonmedia.com>	2020-06-09 14:53:30 +0000
committer	Geir Storli <geirst@verizonmedia.com>	2020-06-10 07:28:41 +0000
commit	87e669fde4007de7675295687882d20b73e12aed (patch)
tree	cd941c9d7051ae5b5b076913d00529babc013160 /searchlib/src
parent	66c66aa167c2ba431943ad7287da3f20b11a05ab (diff)