diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-11-04 10:34:18 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-11-04 10:34:18 +0100 |
commit | 3543686836eb2c949e2400412d4c06ec490c80ab (patch) | |
tree | dac31a8ee0bf70a6bd089567d232447a7552c417 | |
parent | e810ec76d6e029f2a5993947d57adfded0abd438 (diff) |
Pass VectorBundle to NearestNeighborIndex::prepare_add_document member function.
8 files changed, 29 insertions, 17 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 791ce80f62a..6fe5998a347 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -57,6 +57,7 @@ using search::tensor::NearestNeighborIndexSaver; using search::tensor::PrepareResult; using search::tensor::SerializedFastValueAttribute; using search::tensor::TensorAttribute; +using search::tensor::VectorBundle; using vespalib::datastore::CompactionStrategy; using vespalib::eval::CellType; using vespalib::eval::SimpleValue; @@ -202,10 +203,11 @@ public: _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - vespalib::eval::TypedCells vector, + VectorBundle vectors, vespalib::GenerationHandler::Guard guard) const override { (void) guard; - auto d_vector = vector.typify<double>(); + assert(vectors.subspaces() == 1); + auto d_vector = vectors.cells(0).typify<double>(); _prepare_adds.emplace_back(docid, DoubleVector(d_vector.begin(), d_vector.end())); return std::make_unique<MockPrepareResult>(docid); } diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 1738ee510c8..9da2d8a4d5f 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -719,8 +719,8 @@ public: UP prepare_add(uint32_t docid, uint32_t max_level = 0) { level_generator->level = max_level; vespalib::GenerationHandler::Guard dummy; - auto vector = vectors.get_vector(docid, 0); - return index->prepare_add_document(docid, vector, dummy); + auto vectors_to_add = vectors.get_vectors(docid); + return index->prepare_add_document(docid, vectors_to_add, dummy); } void complete_add(uint32_t docid, UP up) { index->complete_add_document(docid, std::move(up)); diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 4cc24657a00..9a5f3a1627d 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -184,7 +184,7 @@ public: return result_promise.get_future(); } void run() override { - auto v = vespalib::eval::TypedCells(vec); + VectorBundle v(vec.data(), CellType::FLOAT, 1, vec.size() * sizeof(float), vec.size()); auto up = parent.index->prepare_add_document(docid, v, read_guard); result_promise.set_value(std::move(up)); } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index fd94c4eb60c..7ad4caade03 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -52,7 +52,12 @@ DenseTensorAttribute::prepare_set_tensor(DocId docid, const Value& tensor) const // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. return {}; } - return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard()); + auto cells = tensor.cells(); + auto& type = tensor.type(); + auto cell_type = type.cell_type(); + auto subspace_size = type.dense_subspace_size(); + VectorBundle vectors(cells.data, cell_type, tensor.index().size(), vespalib::eval::CellTypeUtils::mem_size(cell_type, subspace_size), subspace_size); + return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard()); } return {}; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 89b4f62146c..e9ce77cc0d6 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -7,6 +7,7 @@ #include "hnsw_index_loader.hpp" #include "hnsw_index_saver.h" #include "random_level_generator.h" +#include "vector_bundle.h" #include <vespa/searchlib/attribute/address_space_components.h> #include <vespa/searchlib/attribute/address_space_usage.h> #include <vespa/searchlib/queryeval/global_filter.h> @@ -347,8 +348,10 @@ HnswIndex::add_document(uint32_t docid) } HnswIndex::PreparedAddDoc -HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespalib::GenerationHandler::Guard read_guard) const +HnswIndex::internal_prepare_add(uint32_t docid, VectorBundle input_vectors, vespalib::GenerationHandler::Guard read_guard) const { + assert(input_vectors.subspaces() == 1); + auto input_vector = input_vectors.cells(0); // TODO: Add capping on num_levels int level = _level_generator->max_level(); PreparedAddDoc op(docid, level, std::move(read_guard)); @@ -424,8 +427,8 @@ HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op) std::unique_ptr<PrepareResult> HnswIndex::prepare_add_document(uint32_t docid, - TypedCells vector, - vespalib::GenerationHandler::Guard read_guard) const + VectorBundle vectors, + vespalib::GenerationHandler::Guard read_guard) const { uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire); if (max_nodes < _cfg.min_size_before_two_phase()) { @@ -433,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid, // to ensure they are linked together: return std::make_unique<PreparedFirstAddDoc>(); } - PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard)); + PreparedAddDoc op = internal_prepare_add(docid, vectors, std::move(read_guard)); return std::make_unique<PreparedAddDoc>(std::move(op)); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 9a05fe223c5..2714464073e 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -9,6 +9,7 @@ #include "nearest_neighbor_index.h" #include "random_level_generator.h" #include "hnsw_graph.h" +#include "vector_bundle.h" #include <vespa/eval/eval/typed_cells.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/datastore/array_store.h> @@ -134,8 +135,8 @@ protected: uint32_t docid = get_docid(nodeid); return _vectors.get_vector(docid, 0); } - inline TypedCells get_vector_by_docid(uint32_t docid) const { - return _vectors.get_vector(docid, 0); + inline VectorBundle get_vector_by_docid(uint32_t docid) const { + return _vectors.get_vectors(docid); } double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const; @@ -174,7 +175,7 @@ protected: ~PreparedAddDoc() = default; PreparedAddDoc(PreparedAddDoc&& other) = default; }; - PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector, + PreparedAddDoc internal_prepare_add(uint32_t docid, VectorBundle input_vectors, vespalib::GenerationHandler::Guard read_guard) const; LinkArray filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid); void internal_complete_add(uint32_t docid, PreparedAddDoc &op); @@ -188,8 +189,8 @@ public: // Implements NearestNeighborIndex void add_document(uint32_t docid) override; std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - TypedCells vector, - vespalib::GenerationHandler::Guard read_guard) const override; + VectorBundle vectors, + vespalib::GenerationHandler::Guard read_guard) const override; void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override; void remove_node(uint32_t nodeid); void remove_document(uint32_t docid) override; diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index d40803dcafd..de1ea26d7bf 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -4,6 +4,7 @@ #include "distance_function.h" #include "prepare_result.h" +#include "vector_bundle.h" #include <vespa/vespalib/util/generationhandler.h> #include <vespa/vespalib/util/memoryusage.h> #include <cstdint> @@ -57,7 +58,7 @@ public: * The given read guard must be kept in the result. */ virtual std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - vespalib::eval::TypedCells vector, + VectorBundle vectors, vespalib::GenerationHandler::Guard read_guard) const = 0; /** * Performs the complete step in a two-phase operation to add a document to the index. diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 1d43afe505e..922c2fefa28 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -159,7 +159,7 @@ ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) { ++_pending; auto dense_store = _store.as_dense(); auto task = vespalib::makeLambdaTask([this, ref, lid, dense_store]() { - auto prepared = _index.prepare_add_document(lid, dense_store->get_typed_cells(ref), + auto prepared = _index.prepare_add_document(lid, dense_store->get_vectors(ref), _generation_handler.takeGuard()); std::unique_lock guard(_mutex); _queue.push(std::make_pair(lid, std::move(prepared))); |