summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-03-25 19:18:47 +0100
committerGitHub <noreply@github.com>2020-03-25 19:18:47 +0100
commit3db1991636a31acaffd2826f671c52960adcc938 (patch)
treeaf01b0af174630f70a059d5e6b16f2ca4923ce9d /searchlib
parentabafea0685106e657598ded66391c28733d36808 (diff)
parenta5ab1bb6a73aab4d25cecfb90ac60361b5ce58ec (diff)
Merge pull request #12713 from vespa-engine/geirst/nearest-neighbor-index-saver-skeleton
Add skeleton for nearest neighbor index saver.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h20
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h27
5 files changed, 67 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index d9a4431f89b..b6bdee4f94d 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -5,8 +5,8 @@
#include <vespa/eval/tensor/dense/dense_tensor.h>
#include <vespa/eval/tensor/tensor.h>
#include <vespa/fastos/file.h>
-#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/attribute/attribute_read_guard.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
@@ -14,6 +14,7 @@
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h>
+#include <vespa/searchlib/tensor/nearest_neighbor_index_saver.h>
#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/vespalib/io/fileutil.h>
@@ -35,6 +36,7 @@ using search::tensor::GenericTensorAttribute;
using search::tensor::HnswIndex;
using search::tensor::NearestNeighborIndex;
using search::tensor::NearestNeighborIndexFactory;
+using search::tensor::NearestNeighborIndexSaver;
using search::tensor::TensorAttribute;
using vespalib::eval::TensorSpec;
using vespalib::eval::ValueType;
@@ -143,6 +145,10 @@ public:
return vespalib::MemoryUsage();
}
void get_state(const vespalib::slime::Inserter&) const override {}
+ std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override {
+ return std::unique_ptr<NearestNeighborIndexSaver>();
+ }
+ void load(const search::fileutil::LoadedBuffer&) override {}
std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override {
(void) k;
(void) vector;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 988264c0455..19b02d18893 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -2,6 +2,7 @@
#include "distance_function.h"
#include "hnsw_index.h"
+#include "nearest_neighbor_index_saver.h"
#include "random_level_generator.h"
#include <vespa/searchlib/util/state_explorer_utils.h>
#include <vespa/eval/tensor/dense/typed_cells.h>
@@ -435,6 +436,18 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const
StateExplorerUtils::memory_usage_to_slime(memory_usage(), object.setObject("memory_usage"));
}
+std::unique_ptr<NearestNeighborIndexSaver>
+HnswIndex::make_saver() const
+{
+ return std::unique_ptr<NearestNeighborIndexSaver>();
+}
+
+void
+HnswIndex::load(const fileutil::LoadedBuffer& buf)
+{
+ (void) buf;
+}
+
struct NeighborsByDocId {
bool operator() (const NearestNeighborIndex::Neighbor &lhs,
const NearestNeighborIndex::Neighbor &rhs)
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 130c012effe..1185acd9624 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -155,6 +155,9 @@ public:
vespalib::MemoryUsage memory_usage() const override;
void get_state(const vespalib::slime::Inserter& inserter) const override;
+ std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override;
+ void load(const fileutil::LoadedBuffer& buf) override;
+
std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override;
const DistanceFunction *distance_function() const override { return _distance_func.get(); }
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index e7302028996..bb6ef012a56 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -2,17 +2,22 @@
#pragma once
-#include <cstdint>
-#include <vector>
+#include "distance_function.h"
#include <vespa/eval/tensor/dense/typed_cells.h>
#include <vespa/vespalib/util/generationhandler.h>
#include <vespa/vespalib/util/memoryusage.h>
-#include "distance_function.h"
+#include <cstdint>
+#include <memory>
+#include <vector>
namespace vespalib::slime { struct Inserter; }
+namespace search::fileutil { class LoadedBuffer; }
+
namespace search::tensor {
+class NearestNeighborIndexSaver;
+
/**
* Interface for an index that is used for (approximate) nearest neighbor search.
*/
@@ -35,6 +40,15 @@ public:
virtual vespalib::MemoryUsage memory_usage() const = 0;
virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0;
+ /**
+ * Creates a saver that is used to save the index to binary form.
+ *
+ * This function is always called by the attribute write thread,
+ * and the caller ensures that an attribute read guard is held during the lifetime of the saver.
+ */
+ virtual std::unique_ptr<NearestNeighborIndexSaver> make_saver() const = 0;
+ virtual void load(const fileutil::LoadedBuffer& buf) = 0;
+
virtual std::vector<Neighbor> find_top_k(uint32_t k,
vespalib::tensor::TypedCells vector,
uint32_t explore_k) const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h
new file mode 100644
index 00000000000..7d599ac31c8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h
@@ -0,0 +1,27 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search { class BufferWriter; }
+
+namespace search::tensor {
+
+/**
+ * Interface that is used to save a nearest neighbor index to binary form.
+ *
+ * An instance of this interface must hold a snapshot of the index from the
+ * point in time the instance was created, and then save this to binary form in the save() function.
+ *
+ * The instance is always created by the attribute write thread,
+ * and the caller ensures that an attribute read guard is held during the lifetime of the saver.
+ * Data that might change later must be copied in the constructor.
+ *
+ * A flush thread is calling save() at a later point in time.
+ */
+class NearestNeighborIndexSaver {
+public:
+ virtual ~NearestNeighborIndexSaver() {}
+ virtual void save(BufferWriter& writer) const;
+};
+
+}