diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-03-25 19:18:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-25 19:18:47 +0100 |
commit | 3db1991636a31acaffd2826f671c52960adcc938 (patch) | |
tree | af01b0af174630f70a059d5e6b16f2ca4923ce9d /searchlib | |
parent | abafea0685106e657598ded66391c28733d36808 (diff) | |
parent | a5ab1bb6a73aab4d25cecfb90ac60361b5ce58ec (diff) |
Merge pull request #12713 from vespa-engine/geirst/nearest-neighbor-index-saver-skeleton
Add skeleton for nearest neighbor index saver.
Diffstat (limited to 'searchlib')
5 files changed, 67 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index d9a4431f89b..b6bdee4f94d 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -5,8 +5,8 @@ #include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/eval/tensor/tensor.h> #include <vespa/fastos/file.h> -#include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/attribute/attribute_read_guard.h> +#include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> #include <vespa/searchlib/tensor/doc_vector_access.h> @@ -14,6 +14,7 @@ #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/nearest_neighbor_index.h> #include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h> +#include <vespa/searchlib/tensor/nearest_neighbor_index_saver.h> #include <vespa/searchlib/tensor/tensor_attribute.h> #include <vespa/vespalib/data/fileheader.h> #include <vespa/vespalib/io/fileutil.h> @@ -35,6 +36,7 @@ using search::tensor::GenericTensorAttribute; using search::tensor::HnswIndex; using search::tensor::NearestNeighborIndex; using search::tensor::NearestNeighborIndexFactory; +using search::tensor::NearestNeighborIndexSaver; using search::tensor::TensorAttribute; using vespalib::eval::TensorSpec; using vespalib::eval::ValueType; @@ -143,6 +145,10 @@ public: return vespalib::MemoryUsage(); } void get_state(const vespalib::slime::Inserter&) const override {} + std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override { + return std::unique_ptr<NearestNeighborIndexSaver>(); + } + void load(const search::fileutil::LoadedBuffer&) override {} std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override { (void) k; (void) vector; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 988264c0455..19b02d18893 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -2,6 +2,7 @@ #include "distance_function.h" #include "hnsw_index.h" +#include "nearest_neighbor_index_saver.h" #include "random_level_generator.h" #include <vespa/searchlib/util/state_explorer_utils.h> #include <vespa/eval/tensor/dense/typed_cells.h> @@ -435,6 +436,18 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const StateExplorerUtils::memory_usage_to_slime(memory_usage(), object.setObject("memory_usage")); } +std::unique_ptr<NearestNeighborIndexSaver> +HnswIndex::make_saver() const +{ + return std::unique_ptr<NearestNeighborIndexSaver>(); +} + +void +HnswIndex::load(const fileutil::LoadedBuffer& buf) +{ + (void) buf; +} + struct NeighborsByDocId { bool operator() (const NearestNeighborIndex::Neighbor &lhs, const NearestNeighborIndex::Neighbor &rhs) diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 130c012effe..1185acd9624 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -155,6 +155,9 @@ public: vespalib::MemoryUsage memory_usage() const override; void get_state(const vespalib::slime::Inserter& inserter) const override; + std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override; + void load(const fileutil::LoadedBuffer& buf) override; + std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override; const DistanceFunction *distance_function() const override { return _distance_func.get(); } diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index e7302028996..bb6ef012a56 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -2,17 +2,22 @@ #pragma once -#include <cstdint> -#include <vector> +#include "distance_function.h" #include <vespa/eval/tensor/dense/typed_cells.h> #include <vespa/vespalib/util/generationhandler.h> #include <vespa/vespalib/util/memoryusage.h> -#include "distance_function.h" +#include <cstdint> +#include <memory> +#include <vector> namespace vespalib::slime { struct Inserter; } +namespace search::fileutil { class LoadedBuffer; } + namespace search::tensor { +class NearestNeighborIndexSaver; + /** * Interface for an index that is used for (approximate) nearest neighbor search. */ @@ -35,6 +40,15 @@ public: virtual vespalib::MemoryUsage memory_usage() const = 0; virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0; + /** + * Creates a saver that is used to save the index to binary form. + * + * This function is always called by the attribute write thread, + * and the caller ensures that an attribute read guard is held during the lifetime of the saver. + */ + virtual std::unique_ptr<NearestNeighborIndexSaver> make_saver() const = 0; + virtual void load(const fileutil::LoadedBuffer& buf) = 0; + virtual std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h new file mode 100644 index 00000000000..7d599ac31c8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_saver.h @@ -0,0 +1,27 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { class BufferWriter; } + +namespace search::tensor { + +/** + * Interface that is used to save a nearest neighbor index to binary form. + * + * An instance of this interface must hold a snapshot of the index from the + * point in time the instance was created, and then save this to binary form in the save() function. + * + * The instance is always created by the attribute write thread, + * and the caller ensures that an attribute read guard is held during the lifetime of the saver. + * Data that might change later must be copied in the constructor. + * + * A flush thread is calling save() at a later point in time. + */ +class NearestNeighborIndexSaver { +public: + virtual ~NearestNeighborIndexSaver() {} + virtual void save(BufferWriter& writer) const; +}; + +} |