From b87395750cb79933a01549cb7841a5f39d833a81 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 4 Nov 2022 15:29:36 +0100 Subject: Add search::tensor::SubspaceType. --- .../tests/tensor/hnsw_index/hnsw_index_test.cpp | 13 +++++++++-- .../src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp | 14 ++++++++++-- .../src/vespa/searchlib/tensor/CMakeLists.txt | 1 + .../searchlib/tensor/dense_tensor_attribute.cpp | 6 +---- .../vespa/searchlib/tensor/dense_tensor_store.cpp | 4 +++- .../vespa/searchlib/tensor/dense_tensor_store.h | 4 +++- .../vespa/searchlib/tensor/direct_tensor_store.cpp | 3 ++- .../vespa/searchlib/tensor/direct_tensor_store.h | 6 ++--- .../src/vespa/searchlib/tensor/empty_subspace.cpp | 10 ++++----- .../src/vespa/searchlib/tensor/empty_subspace.h | 6 ++--- .../src/vespa/searchlib/tensor/subspace_type.cpp | 15 +++++++++++++ .../src/vespa/searchlib/tensor/subspace_type.h | 26 ++++++++++++++++++++++ .../searchlib/tensor/tensor_buffer_operations.cpp | 26 ++++++++++------------ .../searchlib/tensor/tensor_buffer_operations.h | 9 ++++---- .../src/vespa/searchlib/tensor/vector_bundle.h | 9 ++++---- 15 files changed, 105 insertions(+), 47 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/tensor/subspace_type.cpp create mode 100644 searchlib/src/vespa/searchlib/tensor/subspace_type.h diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 9da2d8a4d5f..7a32511ff26 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -1,11 +1,13 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include #include #include #include #include #include #include +#include #include #include #include @@ -25,6 +27,7 @@ using namespace vespalib::slime; using vespalib::Slime; using search::BitVector; using vespalib::eval::get_cell_type; +using vespalib::eval::ValueType; using vespalib::datastore::CompactionSpec; using vespalib::datastore::CompactionStrategy; using search::queryeval::GlobalFilter; @@ -35,9 +38,14 @@ private: using Vector = std::vector; using ArrayRef = vespalib::ConstArrayRef; std::vector _vectors; + SubspaceType _subspace_type; public: - MyDocVectorAccess() : _vectors() {} + MyDocVectorAccess() + : _vectors(), + _subspace_type(ValueType::make_type(get_cell_type(), {{"dims", 2}})) + { + } MyDocVectorAccess& set(uint32_t docid, const Vector& vec) { if (docid >= _vectors.size()) { _vectors.resize(docid + 1); @@ -52,7 +60,8 @@ public: } VectorBundle get_vectors(uint32_t docid) const override { ArrayRef ref(_vectors[docid]); - return VectorBundle(ref.data(), get_cell_type(), 1, ref.size() * sizeof(FloatType), ref.size()); + assert(_subspace_type.size() == ref.size()); + return VectorBundle(ref.data(), 1, _subspace_type); } void clear() { _vectors.clear(); } diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 9a5f3a1627d..2a3f0b4af27 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ using namespace search::tensor; using namespace vespalib::slime; using search::BitVector; using vespalib::eval::CellType; +using vespalib::eval::ValueType; using vespalib::GenerationHandler; using vespalib::MemoryUsage; using vespalib::Slime; @@ -42,6 +44,12 @@ using vespalib::Slime; #define NUM_POSSIBLE_DOCS 30000 #define NUM_OPS 1000000 +namespace { + +SubspaceType subspace_type(ValueType::make_type(CellType::FLOAT, {{"dims", NUM_DIMS }})); + +} + class RndGen { private: std::mt19937_64 urng; @@ -121,7 +129,8 @@ public: VectorBundle get_vectors(uint32_t docid) const override { assert(docid < NUM_POSSIBLE_DOCS); ConstVectorRef ref(_vectors[docid]); - return VectorBundle(ref.data(), CellType::FLOAT, 1, sizeof(float) * NUM_DIMS, NUM_DIMS); + assert(subspace_type.size() == ref.size()); + return VectorBundle(ref.data(), 1, subspace_type); } }; @@ -184,7 +193,8 @@ public: return result_promise.get_future(); } void run() override { - VectorBundle v(vec.data(), CellType::FLOAT, 1, vec.size() * sizeof(float), vec.size()); + assert(subspace_type.size() == vec.size()); + VectorBundle v(vec.data(), 1, subspace_type); auto up = parent.index->prepare_add_document(docid, v, read_guard); result_promise.set_value(std::move(up)); } diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 2e8b619b2c4..bb2df40c368 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -28,6 +28,7 @@ vespa_add_library(searchlib_tensor OBJECT nearest_neighbor_index_saver.cpp serialized_fast_value_attribute.cpp small_subspaces_buffer_type.cpp + subspace_type.cpp tensor_attribute.cpp tensor_attribute_loader.cpp tensor_attribute_saver.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 7ad4caade03..5cd5455bcac 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -52,11 +52,7 @@ DenseTensorAttribute::prepare_set_tensor(DocId docid, const Value& tensor) const // With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point. return {}; } - auto cells = tensor.cells(); - auto& type = tensor.type(); - auto cell_type = type.cell_type(); - auto subspace_size = type.dense_subspace_size(); - VectorBundle vectors(cells.data, cell_type, tensor.index().size(), vespalib::eval::CellTypeUtils::mem_size(cell_type, subspace_size), subspace_size); + VectorBundle vectors(tensor.cells().data, tensor.index().size(), _denseTensorStore.get_subspace_type()); return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard()); } return {}; diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp index c7327422f81..c373f6bdcd0 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "dense_tensor_store.h" +#include "subspace_type.h" #include #include #include @@ -79,7 +80,8 @@ DenseTensorStore::DenseTensorStore(const ValueType &type, std::shared_ptr allocator); @@ -86,8 +87,9 @@ public: if (!ref.valid()) { return VectorBundle(); } - return VectorBundle(getRawBuffer(ref), _type.cell_type(), 1, getBufSize(), getNumCells()); + return VectorBundle(getRawBuffer(ref), 1, _subspace_type); } + const SubspaceType& get_subspace_type() const noexcept { return _subspace_type; } // The following method is meant to be used only for unit tests. uint32_t getArraySize() const { return _bufferType.getArraySize(); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp index cfc70cddb5c..fa13ab6303c 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp @@ -53,7 +53,8 @@ DirectTensorStore::add_entry(TensorSP tensor) DirectTensorStore::DirectTensorStore(const vespalib::eval::ValueType& tensor_type) : TensorStore(_tensor_store), _tensor_store(std::make_unique()), - _empty(tensor_type) + _subspace_type(tensor_type), + _empty(_subspace_type) { _tensor_store.enableFreeLists(); } diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h index a84c321c13a..01084e89776 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h @@ -4,6 +4,7 @@ #include "tensor_store.h" #include "empty_subspace.h" +#include "subspace_type.h" #include "vector_bundle.h" #include #include @@ -35,6 +36,7 @@ private: }; TensorStoreType _tensor_store; + SubspaceType _subspace_type; EmptySubspace _empty; EntryRef add_entry(TensorSP tensor); @@ -68,9 +70,7 @@ public: if (tensor == nullptr) { return VectorBundle(); } - auto type = tensor->type(); - auto subspace_size = type.dense_subspace_size(); - return VectorBundle(tensor->cells().data, type.cell_type(), tensor->index().size(), vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace_size), subspace_size); + return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp index c225a6082f5..38ed3bd3bdf 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp @@ -1,18 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "empty_subspace.h" -#include +#include "subspace_type.h" namespace search::tensor { -EmptySubspace::EmptySubspace(const vespalib::eval::ValueType& type) +EmptySubspace::EmptySubspace(const SubspaceType& type) : _empty_space(), _cells() { - auto dense_subspace_size = type.dense_subspace_size(); - auto cell_type = type.cell_type(); - _empty_space.resize(vespalib::eval::CellTypeUtils::mem_size(cell_type, dense_subspace_size), 0); - _cells = vespalib::eval::TypedCells(&_empty_space[0], cell_type, dense_subspace_size); + _empty_space.resize(type.mem_size()), + _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), type.size()); } EmptySubspace::~EmptySubspace() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h index 8e58e35712f..017486bc643 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h @@ -5,10 +5,10 @@ #include #include -namespace vespalib::eval { class ValueType; } - namespace search::tensor { +class SubspaceType; + /* * Class containg an empty subspace, used as a bad fallback when we cannot * get a real subspace. @@ -18,7 +18,7 @@ class EmptySubspace std::vector _empty_space; vespalib::eval::TypedCells _cells; public: - EmptySubspace(const vespalib::eval::ValueType& type); + explicit EmptySubspace(const SubspaceType& type); ~EmptySubspace(); const vespalib::eval::TypedCells& cells() const noexcept { return _cells; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp new file mode 100644 index 00000000000..187af7531af --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "subspace_type.h" +#include + +namespace search::tensor { + +SubspaceType::SubspaceType(const vespalib::eval::ValueType& type) + : _cell_type(type.cell_type()), + _size(type.dense_subspace_size()), + _mem_size(vespalib::eval::CellTypeUtils::mem_size(_cell_type, _size)) +{ +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.h b/searchlib/src/vespa/searchlib/tensor/subspace_type.h new file mode 100644 index 00000000000..88520723155 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace vespalib::eval { class ValueType; } + +namespace search::tensor { + +/* + * Class describing the type of a dense subspace in a tensor. + */ +class SubspaceType +{ + vespalib::eval::CellType _cell_type; + size_t _size; // # cells + size_t _mem_size; // # bytes +public: + explicit SubspaceType(const vespalib::eval::ValueType& type); + vespalib::eval::CellType cell_type() const noexcept { return _cell_type; } + size_t size() const noexcept { return _size; } + size_t mem_size() const noexcept { return _mem_size; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp index 3a861552d80..4f0db54ac2a 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp @@ -68,14 +68,12 @@ FastValueView::FastValueView(const ValueType& type, ConstArrayRef lab } TensorBufferOperations::TensorBufferOperations(const vespalib::eval::ValueType& tensor_type) - : _num_mapped_dimensions(tensor_type.count_mapped_dimensions()), - _cell_mem_size(vespalib::eval::CellTypeUtils::mem_size(tensor_type.cell_type(), 1u)), - _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(tensor_type.cell_type()))), - _dense_subspace_size(tensor_type.dense_subspace_size()), - _cell_type(tensor_type.cell_type()), + : _subspace_type(tensor_type), + _num_mapped_dimensions(tensor_type.count_mapped_dimensions()), + _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(_subspace_type.cell_type()))), _addr(_num_mapped_dimensions), _addr_refs(), - _empty(tensor_type) + _empty(_subspace_type) { _addr_refs.reserve(_addr.size()); for (auto& label : _addr) { @@ -106,8 +104,8 @@ TensorBufferOperations::store_tensor(ArrayRef buf, const vespalib::eval::V uint32_t num_subspaces = tensor.index().size(); assert(num_subspaces <= num_subspaces_mask); auto labels_end_offset = get_labels_offset() + get_labels_mem_size(num_subspaces); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = aligner.align(labels_end_offset); auto cells_end_offset = cells_start_offset + cells_mem_size; @@ -148,11 +146,11 @@ TensorBufferOperations::make_fast_view(ConstArrayRef buf, const vespalib:: auto num_subspaces = get_num_subspaces(buf); assert(buf.size() >= get_array_size(num_subspaces)); ConstArrayRef labels(reinterpret_cast(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = get_cells_offset(num_subspaces, aligner); - TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size); + TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size); assert(cells_start_offset + cells_mem_size <= buf.size()); return std::make_unique(tensor_type, labels, cells, _num_mapped_dimensions, num_subspaces); } @@ -187,11 +185,11 @@ TensorBufferOperations::encode_stored_tensor(ConstArrayRef buf, const vesp auto num_subspaces = get_num_subspaces(buf); assert(buf.size() >= get_array_size(num_subspaces)); ConstArrayRef labels(reinterpret_cast(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); - auto cells_size = num_subspaces * _dense_subspace_size; - auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes + auto cells_size = num_subspaces * _subspace_type.size(); + auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes auto aligner = select_aligner(cells_mem_size); auto cells_start_offset = get_cells_offset(num_subspaces, aligner); - TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size); + TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size); assert(cells_start_offset + cells_mem_size <= buf.size()); StringIdVector labels_copy(labels.begin(), labels.end()); StreamedValueView streamed_value_view(tensor_type, _num_mapped_dimensions, cells, num_subspaces, labels_copy); diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 43463e50ff1..26cf9a429a1 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -3,6 +3,7 @@ #pragma once #include "empty_subspace.h" +#include "subspace_type.h" #include "vector_bundle.h" #include #include @@ -39,11 +40,9 @@ namespace search::tensor { */ class TensorBufferOperations { + SubspaceType _subspace_type; uint32_t _num_mapped_dimensions; - uint32_t _cell_mem_size; uint32_t _min_alignment; - size_t _dense_subspace_size; - vespalib::eval::CellType _cell_type; std::vector _addr; std::vector _addr_refs; EmptySubspace _empty; @@ -58,7 +57,7 @@ class TensorBufferOperations static constexpr size_t get_num_subspaces_size() noexcept { return sizeof(uint32_t); } static constexpr size_t get_labels_offset() noexcept { return get_num_subspaces_size(); } size_t get_cells_mem_size(uint32_t num_subspaces) const noexcept { - return _dense_subspace_size * _cell_mem_size * num_subspaces; + return _subspace_type.mem_size() * num_subspaces; } auto select_aligner(size_t cells_mem_size) const noexcept { return Aligner((cells_mem_size < CELLS_ALIGNMENT_MEM_SIZE_MIN) ? _min_alignment : CELLS_ALIGNMENT); @@ -108,7 +107,7 @@ public: auto num_subspaces = get_num_subspaces(buf); auto cells_mem_size = get_cells_mem_size(num_subspaces); auto aligner = select_aligner(cells_mem_size); - return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), _cell_type, num_subspaces, _dense_subspace_size * _cell_mem_size, _dense_subspace_size); + return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h index cb1d3f99cfb..09d0b514954 100644 --- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h +++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h @@ -2,6 +2,7 @@ #pragma once +#include "subspace_type.h" #include #include @@ -27,12 +28,12 @@ public: _subspace_size(0) { } - VectorBundle(const void *data, vespalib::eval::CellType cell_type, uint32_t subspaces, size_t subspace_mem_size, size_t subspace_size) + VectorBundle(const void *data, uint32_t subspaces, const SubspaceType& subspace_type) : _data(data), - _cell_type(cell_type), + _cell_type(subspace_type.cell_type()), _subspaces(subspaces), - _subspace_mem_size(subspace_mem_size), - _subspace_size(subspace_size) + _subspace_mem_size(subspace_type.mem_size()), + _subspace_size(subspace_type.size()) { } ~VectorBundle() = default; -- cgit v1.2.3