diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-11-04 16:46:37 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-04 16:46:37 +0100 |
commit | 7eb6f8bcec2a2f13f368ebd115c9c8d2f559d95c (patch) | |
tree | 193bc7910e012198821c21c95b635aeeed6279a5 | |
parent | 6bf399dfe0fb19e8e02bf018fe358f3716a2fce9 (diff) | |
parent | 375cbe205fdf706e8e05cc72159ea96bba2ebc29 (diff) |
Merge pull request #24744 from vespa-engine/toregge/pass-vector-bundle-to-prepare-add-document-method
Pass VectorBundle to NearestNeighborIndex::prepare_add_document membeā¦
20 files changed, 128 insertions, 58 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 791ce80f62a..6fe5998a347 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -57,6 +57,7 @@ using search::tensor::NearestNeighborIndexSaver; using search::tensor::PrepareResult; using search::tensor::SerializedFastValueAttribute; using search::tensor::TensorAttribute; +using search::tensor::VectorBundle; using vespalib::datastore::CompactionStrategy; using vespalib::eval::CellType; using vespalib::eval::SimpleValue; @@ -202,10 +203,11 @@ public: _adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - vespalib::eval::TypedCells vector, + VectorBundle vectors, vespalib::GenerationHandler::Guard guard) const override { (void) guard; - auto d_vector = vector.typify<double>(); + assert(vectors.subspaces() == 1); + auto d_vector = vectors.cells(0).typify<double>(); _prepare_adds.emplace_back(docid, DoubleVector(d_vector.begin(), d_vector.end())); return std::make_unique<MockPrepareResult>(docid); } diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 1738ee510c8..7a32511ff26 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -1,11 +1,13 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/eval/eval/value_type.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/doc_vector_access.h> #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/random_level_generator.h> #include <vespa/searchlib/tensor/inv_log_level_generator.h> +#include <vespa/searchlib/tensor/subspace_type.h> #include <vespa/searchlib/tensor/vector_bundle.h> #include <vespa/searchlib/queryeval/global_filter.h> #include <vespa/vespalib/datastore/compaction_spec.h> @@ -25,6 +27,7 @@ using namespace vespalib::slime; using vespalib::Slime; using search::BitVector; using vespalib::eval::get_cell_type; +using vespalib::eval::ValueType; using vespalib::datastore::CompactionSpec; using vespalib::datastore::CompactionStrategy; using search::queryeval::GlobalFilter; @@ -35,9 +38,14 @@ private: using Vector = std::vector<FloatType>; using ArrayRef = vespalib::ConstArrayRef<FloatType>; std::vector<Vector> _vectors; + SubspaceType _subspace_type; public: - MyDocVectorAccess() : _vectors() {} + MyDocVectorAccess() + : _vectors(), + _subspace_type(ValueType::make_type(get_cell_type<FloatType>(), {{"dims", 2}})) + { + } MyDocVectorAccess& set(uint32_t docid, const Vector& vec) { if (docid >= _vectors.size()) { _vectors.resize(docid + 1); @@ -52,7 +60,8 @@ public: } VectorBundle get_vectors(uint32_t docid) const override { ArrayRef ref(_vectors[docid]); - return VectorBundle(ref.data(), get_cell_type<FloatType>(), 1, ref.size() * sizeof(FloatType), ref.size()); + assert(_subspace_type.size() == ref.size()); + return VectorBundle(ref.data(), 1, _subspace_type); } void clear() { _vectors.clear(); } @@ -719,8 +728,8 @@ public: UP prepare_add(uint32_t docid, uint32_t max_level = 0) { level_generator->level = max_level; vespalib::GenerationHandler::Guard dummy; - auto vector = vectors.get_vector(docid, 0); - return index->prepare_add_document(docid, vector, dummy); + auto vectors_to_add = vectors.get_vectors(docid); + return index->prepare_add_document(docid, vectors_to_add, dummy); } void complete_add(uint32_t docid, UP up) { index->complete_add_document(docid, std::move(up)); diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 4cc24657a00..2a3f0b4af27 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -9,6 +9,7 @@ #include <vector> #include <vespa/eval/eval/typed_cells.h> +#include <vespa/eval/eval/value_type.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/doc_vector_access.h> @@ -33,6 +34,7 @@ using namespace search::tensor; using namespace vespalib::slime; using search::BitVector; using vespalib::eval::CellType; +using vespalib::eval::ValueType; using vespalib::GenerationHandler; using vespalib::MemoryUsage; using vespalib::Slime; @@ -42,6 +44,12 @@ using vespalib::Slime; #define NUM_POSSIBLE_DOCS 30000 #define NUM_OPS 1000000 +namespace { + +SubspaceType subspace_type(ValueType::make_type(CellType::FLOAT, {{"dims", NUM_DIMS }})); + +} + class RndGen { private: std::mt19937_64 urng; @@ -121,7 +129,8 @@ public: VectorBundle get_vectors(uint32_t docid) const override { assert(docid < NUM_POSSIBLE_DOCS); ConstVectorRef ref(_vectors[docid]); - return VectorBundle(ref.data(), CellType::FLOAT, 1, sizeof(float) * NUM_DIMS, NUM_DIMS); + assert(subspace_type.size() == ref.size()); + return VectorBundle(ref.data(), 1, subspace_type); } }; @@ -184,7 +193,8 @@ public: return result_promise.get_future(); } void run() override { - auto v = vespalib::eval::TypedCells(vec); + assert(subspace_type.size() == vec.size()); + VectorBundle v(vec.data(), 1, subspace_type); auto up = parent.index->prepare_add_document(docid, v, read_guard); result_promise.set_value(std::move(up)); } diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 2e8b619b2c4..bb2df40c368 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -28,6 +28,7 @@ vespa_add_library(searchlib_tensor OBJECT nearest_neighbor_index_saver.cpp serialized_fast_value_attribute.cpp small_subspaces_buffer_type.cpp + subspace_type.cpp tensor_attribute.cpp tensor_attribute_loader.cpp tensor_attribute_saver.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index fd94c4eb60c..5cd5455bcac 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -52,7 +52,8 @@ DenseTensorAttribute::prepare_set_tensor(DocId docid, const Value& tensor) const // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. return {}; } - return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard()); + VectorBundle vectors(tensor.cells().data, tensor.index().size(), _denseTensorStore.get_subspace_type()); + return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard()); } return {}; } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp index c7327422f81..c373f6bdcd0 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_tensor_store.h" +#include "subspace_type.h" #include <vespa/eval/eval/value.h> #include <vespa/vespalib/datastore/compacting_buffers.h> #include <vespa/vespalib/datastore/compaction_context.h> @@ -79,7 +80,8 @@ DenseTensorStore::DenseTensorStore(const ValueType &type, std::shared_ptr<vespal _tensorSizeCalc(type), _bufferType(_tensorSizeCalc, std::move(allocator)), _type(type), - _empty(type) + _subspace_type(type), + _empty(_subspace_type) { _store.addType(&_bufferType); _store.init_primary_buffers(); diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h index 7b133977073..9e326e0ab1e 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h @@ -52,6 +52,7 @@ private: TensorSizeCalc _tensorSizeCalc; BufferType _bufferType; ValueType _type; // type of dense tensor + SubspaceType _subspace_type; EmptySubspace _empty; public: DenseTensorStore(const ValueType &type, std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator); @@ -86,8 +87,9 @@ public: if (!ref.valid()) { return VectorBundle(); } - return VectorBundle(getRawBuffer(ref), _type.cell_type(), 1, getBufSize(), getNumCells()); + return VectorBundle(getRawBuffer(ref), 1, _subspace_type); } + const SubspaceType& get_subspace_type() const noexcept { return _subspace_type; } // The following method is meant to be used only for unit tests. uint32_t getArraySize() const { return _bufferType.getArraySize(); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp index cfc70cddb5c..fa13ab6303c 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp @@ -53,7 +53,8 @@ DirectTensorStore::add_entry(TensorSP tensor) DirectTensorStore::DirectTensorStore(const vespalib::eval::ValueType& tensor_type) : TensorStore(_tensor_store), _tensor_store(std::make_unique<TensorBufferType>()), - _empty(tensor_type) + _subspace_type(tensor_type), + _empty(_subspace_type) { _tensor_store.enableFreeLists(); } diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h index a84c321c13a..01084e89776 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h @@ -4,6 +4,7 @@ #include "tensor_store.h" #include "empty_subspace.h" +#include "subspace_type.h" #include "vector_bundle.h" #include <vespa/eval/eval/value.h> #include <vespa/vespalib/datastore/datastore.h> @@ -35,6 +36,7 @@ private: }; TensorStoreType _tensor_store; + SubspaceType _subspace_type; EmptySubspace _empty; EntryRef add_entry(TensorSP tensor); @@ -68,9 +70,7 @@ public: if (tensor == nullptr) { return VectorBundle(); } - auto type = tensor->type(); - auto subspace_size = type.dense_subspace_size(); - return VectorBundle(tensor->cells().data, type.cell_type(), tensor->index().size(), vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace_size), subspace_size); + return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp index c225a6082f5..f46531e4fbb 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp @@ -1,18 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "empty_subspace.h" -#include <vespa/eval/eval/value_type.h> +#include "subspace_type.h" namespace search::tensor { -EmptySubspace::EmptySubspace(const vespalib::eval::ValueType& type) +EmptySubspace::EmptySubspace(const SubspaceType& type) : _empty_space(), _cells() { - auto dense_subspace_size = type.dense_subspace_size(); - auto cell_type = type.cell_type(); - _empty_space.resize(vespalib::eval::CellTypeUtils::mem_size(cell_type, dense_subspace_size), 0); - _cells = vespalib::eval::TypedCells(&_empty_space[0], cell_type, dense_subspace_size); + _empty_space.resize(type.mem_size()); + _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), type.size()); } EmptySubspace::~EmptySubspace() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h index 8e58e35712f..017486bc643 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h @@ -5,10 +5,10 @@ #include <vespa/eval/eval/typed_cells.h> #include <vector> -namespace vespalib::eval { class ValueType; } - namespace search::tensor { +class SubspaceType; + /* * Class containg an empty subspace, used as a bad fallback when we cannot * get a real subspace. @@ -18,7 +18,7 @@ class EmptySubspace std::vector<char> _empty_space; vespalib::eval::TypedCells _cells; public: - EmptySubspace(const vespalib::eval::ValueType& type); + explicit EmptySubspace(const SubspaceType& type); ~EmptySubspace(); const vespalib::eval::TypedCells& cells() const noexcept { return _cells; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 89b4f62146c..e9ce77cc0d6 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -7,6 +7,7 @@ #include "hnsw_index_loader.hpp" #include "hnsw_index_saver.h" #include "random_level_generator.h" +#include "vector_bundle.h" #include <vespa/searchlib/attribute/address_space_components.h> #include <vespa/searchlib/attribute/address_space_usage.h> #include <vespa/searchlib/queryeval/global_filter.h> @@ -347,8 +348,10 @@ HnswIndex::add_document(uint32_t docid) } HnswIndex::PreparedAddDoc -HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespalib::GenerationHandler::Guard read_guard) const +HnswIndex::internal_prepare_add(uint32_t docid, VectorBundle input_vectors, vespalib::GenerationHandler::Guard read_guard) const { + assert(input_vectors.subspaces() == 1); + auto input_vector = input_vectors.cells(0); // TODO: Add capping on num_levels int level = _level_generator->max_level(); PreparedAddDoc op(docid, level, std::move(read_guard)); @@ -424,8 +427,8 @@ HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op) std::unique_ptr<PrepareResult> HnswIndex::prepare_add_document(uint32_t docid, - TypedCells vector, - vespalib::GenerationHandler::Guard read_guard) const + VectorBundle vectors, + vespalib::GenerationHandler::Guard read_guard) const { uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire); if (max_nodes < _cfg.min_size_before_two_phase()) { @@ -433,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid, // to ensure they are linked together: return std::make_unique<PreparedFirstAddDoc>(); } - PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard)); + PreparedAddDoc op = internal_prepare_add(docid, vectors, std::move(read_guard)); return std::make_unique<PreparedAddDoc>(std::move(op)); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 9a05fe223c5..2714464073e 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -9,6 +9,7 @@ #include "nearest_neighbor_index.h" #include "random_level_generator.h" #include "hnsw_graph.h" +#include "vector_bundle.h" #include <vespa/eval/eval/typed_cells.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/datastore/array_store.h> @@ -134,8 +135,8 @@ protected: uint32_t docid = get_docid(nodeid); return _vectors.get_vector(docid, 0); } - inline TypedCells get_vector_by_docid(uint32_t docid) const { - return _vectors.get_vector(docid, 0); + inline VectorBundle get_vector_by_docid(uint32_t docid) const { + return _vectors.get_vectors(docid); } double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const; @@ -174,7 +175,7 @@ protected: ~PreparedAddDoc() = default; PreparedAddDoc(PreparedAddDoc&& other) = default; }; - PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector, + PreparedAddDoc internal_prepare_add(uint32_t docid, VectorBundle input_vectors, vespalib::GenerationHandler::Guard read_guard) const; LinkArray filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid); void internal_complete_add(uint32_t docid, PreparedAddDoc &op); @@ -188,8 +189,8 @@ public: // Implements NearestNeighborIndex void add_document(uint32_t docid) override; std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - TypedCells vector, - vespalib::GenerationHandler::Guard read_guard) const override; + VectorBundle vectors, + vespalib::GenerationHandler::Guard read_guard) const override; void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override; void remove_node(uint32_t nodeid); void remove_document(uint32_t docid) override; diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index d40803dcafd..de1ea26d7bf 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -4,6 +4,7 @@ #include "distance_function.h" #include "prepare_result.h" +#include "vector_bundle.h" #include <vespa/vespalib/util/generationhandler.h> #include <vespa/vespalib/util/memoryusage.h> #include <cstdint> @@ -57,7 +58,7 @@ public: * The given read guard must be kept in the result. */ virtual std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid, - vespalib::eval::TypedCells vector, + VectorBundle vectors, vespalib::GenerationHandler::Guard read_guard) const = 0; /** * Performs the complete step in a two-phase operation to add a document to the index. diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp new file mode 100644 index 00000000000..187af7531af --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "subspace_type.h" +#include <vespa/eval/eval/value_type.h> + +namespace search::tensor { + +SubspaceType::SubspaceType(const vespalib::eval::ValueType& type) + : _cell_type(type.cell_type()), + _size(type.dense_subspace_size()), + _mem_size(vespalib::eval::CellTypeUtils::mem_size(_cell_type, _size)) +{ +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.h b/searchlib/src/vespa/searchlib/tensor/subspace_type.h new file mode 100644 index 00000000000..88520723155 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/cell_type.h> + +namespace vespalib::eval { class ValueType; } + +namespace search::tensor { + +/* + * Class describing the type of a dense subspace in a tensor. + */ +class SubspaceType +{ + vespalib::eval::CellType _cell_type; + size_t _size; // # cells + size_t _mem_size; // # bytes +public: + explicit SubspaceType(const vespalib::eval::ValueType& type); + vespalib::eval::CellType cell_type() const noexcept { return _cell_type; } + size_t size() const noexcept { return _size; } + size_t mem_size() const noexcept { return _mem_size; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp index 1d43afe505e..922c2fefa28 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp @@ -159,7 +159,7 @@ ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) { ++_pending; auto dense_store = _store.as_dense(); auto task = vespalib::makeLambdaTask([this, ref, lid, dense_store]() { - auto prepared = _index.prepare_add_document(lid, dense_store->get_typed_cells(ref), + auto prepared = _index.prepare_add_document(lid, dense_store->get_vectors(ref), _generation_handler.takeGuard()); std::unique_lock guard(_mutex); _queue.push(std::make_pair(lid, std::move(prepared))); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp index 3a861552d80..4f0db54ac2a 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp @@ -68,14 +68,12 @@ FastValueView::FastValueView(const ValueType& type, ConstArrayRef<string_id> lab } TensorBufferOperations::TensorBufferOperations(const vespalib::eval::ValueType& tensor_type) - : _num_mapped_dimensions(tensor_type.count_mapped_dimensions()), - _cell_mem_size(vespalib::eval::CellTypeUtils::mem_size(tensor_type.cell_type(), 1u)), - _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(tensor_type.cell_type()))), - _dense_subspace_size(tensor_type.dense_subspace_size()), - _cell_type(tensor_type.cell_type()), + : _subspace_type(tensor_type), + _num_mapped_dimensions(tensor_type.count_mapped_dimensions()), + _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(_subspace_type.cell_type()))), _addr(_num_mapped_dimensions), _addr_refs(), - _empty(tensor_type) + _empty(_subspace_type) { _addr_refs.reserve(_addr.size()); for (auto& label : _addr) { @@ -106,8 +104,8 @@ TensorBufferOperations::store_tensor(ArrayRef<char> buf, const vespalib::eval::V uint32_t num_subspaces = tensor.index().size(); assert(num_subspaces <= num_subspaces_mask); auto labels_end_offset = get_labels_offset() + get_labels_mem_size(num_subspaces); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = aligner.align(labels_end_offset); auto cells_end_offset = cells_start_offset + cells_mem_size; @@ -148,11 +146,11 @@ TensorBufferOperations::make_fast_view(ConstArrayRef<char> buf, const vespalib:: auto num_subspaces = get_num_subspaces(buf); assert(buf.size() >= get_array_size(num_subspaces)); ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = get_cells_offset(num_subspaces, aligner); - TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size); + TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size); assert(cells_start_offset + cells_mem_size <= buf.size()); return std::make_unique<FastValueView>(tensor_type, labels, cells, _num_mapped_dimensions, num_subspaces); } @@ -187,11 +185,11 @@ TensorBufferOperations::encode_stored_tensor(ConstArrayRef<char> buf, const vesp auto num_subspaces = get_num_subspaces(buf); assert(buf.size() >= get_array_size(num_subspaces)); ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = get_cells_offset(num_subspaces, aligner); - TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size); + TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size); assert(cells_start_offset + cells_mem_size <= buf.size()); StringIdVector labels_copy(labels.begin(), labels.end()); StreamedValueView streamed_value_view(tensor_type, _num_mapped_dimensions, cells, num_subspaces, labels_copy); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 43463e50ff1..26cf9a429a1 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -3,6 +3,7 @@ #pragma once #include "empty_subspace.h" +#include "subspace_type.h" #include "vector_bundle.h" #include <vespa/vespalib/datastore/aligner.h> #include <vespa/vespalib/util/arrayref.h> @@ -39,11 +40,9 @@ namespace search::tensor { */ class TensorBufferOperations { + SubspaceType _subspace_type; uint32_t _num_mapped_dimensions; - uint32_t _cell_mem_size; uint32_t _min_alignment; - size_t _dense_subspace_size; - vespalib::eval::CellType _cell_type; std::vector<vespalib::string_id> _addr; std::vector<vespalib::string_id*> _addr_refs; EmptySubspace _empty; @@ -58,7 +57,7 @@ class TensorBufferOperations static constexpr size_t get_num_subspaces_size() noexcept { return sizeof(uint32_t); } static constexpr size_t get_labels_offset() noexcept { return get_num_subspaces_size(); } size_t get_cells_mem_size(uint32_t num_subspaces) const noexcept { - return _dense_subspace_size * _cell_mem_size * num_subspaces; + return _subspace_type.mem_size() * num_subspaces; } auto select_aligner(size_t cells_mem_size) const noexcept { return Aligner((cells_mem_size < CELLS_ALIGNMENT_MEM_SIZE_MIN) ? _min_alignment : CELLS_ALIGNMENT); @@ -108,7 +107,7 @@ public: auto num_subspaces = get_num_subspaces(buf); auto cells_mem_size = get_cells_mem_size(num_subspaces); auto aligner = select_aligner(cells_mem_size); - return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), _cell_type, num_subspaces, _dense_subspace_size * _cell_mem_size, _dense_subspace_size); + return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h index cb1d3f99cfb..09d0b514954 100644 --- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h +++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h @@ -2,6 +2,7 @@ #pragma once +#include "subspace_type.h" #include <vespa/eval/eval/typed_cells.h> #include <cassert> @@ -27,12 +28,12 @@ public: _subspace_size(0) { } - VectorBundle(const void *data, vespalib::eval::CellType cell_type, uint32_t subspaces, size_t subspace_mem_size, size_t subspace_size) + VectorBundle(const void *data, uint32_t subspaces, const SubspaceType& subspace_type) : _data(data), - _cell_type(cell_type), + _cell_type(subspace_type.cell_type()), _subspaces(subspaces), - _subspace_mem_size(subspace_mem_size), - _subspace_size(subspace_size) + _subspace_mem_size(subspace_type.mem_size()), + _subspace_size(subspace_type.size()) { } ~VectorBundle() = default; |