summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-11-04 16:46:37 +0100
committerGitHub <noreply@github.com>2022-11-04 16:46:37 +0100
commit7eb6f8bcec2a2f13f368ebd115c9c8d2f559d95c (patch)
tree193bc7910e012198821c21c95b635aeeed6279a5
parent6bf399dfe0fb19e8e02bf018fe358f3716a2fce9 (diff)
parent375cbe205fdf706e8e05cc72159ea96bba2ebc29 (diff)
Merge pull request #24744 from vespa-engine/toregge/pass-vector-bundle-to-prepare-add-document-method
Pass VectorBundle to NearestNeighborIndex::prepare_add_document membeā€¦
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp6
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp17
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/empty_subspace.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/subspace_type.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/subspace_type.h26
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/vector_bundle.h9
20 files changed, 128 insertions, 58 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 791ce80f62a..6fe5998a347 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -57,6 +57,7 @@ using search::tensor::NearestNeighborIndexSaver;
using search::tensor::PrepareResult;
using search::tensor::SerializedFastValueAttribute;
using search::tensor::TensorAttribute;
+using search::tensor::VectorBundle;
using vespalib::datastore::CompactionStrategy;
using vespalib::eval::CellType;
using vespalib::eval::SimpleValue;
@@ -202,10 +203,11 @@ public:
_adds.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
}
std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid,
- vespalib::eval::TypedCells vector,
+ VectorBundle vectors,
vespalib::GenerationHandler::Guard guard) const override {
(void) guard;
- auto d_vector = vector.typify<double>();
+ assert(vectors.subspaces() == 1);
+ auto d_vector = vectors.cells(0).typify<double>();
_prepare_adds.emplace_back(docid, DoubleVector(d_vector.begin(), d_vector.end()));
return std::make_unique<MockPrepareResult>(docid);
}
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 1738ee510c8..7a32511ff26 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -1,11 +1,13 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/eval/eval/value_type.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/tensor/distance_functions.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/random_level_generator.h>
#include <vespa/searchlib/tensor/inv_log_level_generator.h>
+#include <vespa/searchlib/tensor/subspace_type.h>
#include <vespa/searchlib/tensor/vector_bundle.h>
#include <vespa/searchlib/queryeval/global_filter.h>
#include <vespa/vespalib/datastore/compaction_spec.h>
@@ -25,6 +27,7 @@ using namespace vespalib::slime;
using vespalib::Slime;
using search::BitVector;
using vespalib::eval::get_cell_type;
+using vespalib::eval::ValueType;
using vespalib::datastore::CompactionSpec;
using vespalib::datastore::CompactionStrategy;
using search::queryeval::GlobalFilter;
@@ -35,9 +38,14 @@ private:
using Vector = std::vector<FloatType>;
using ArrayRef = vespalib::ConstArrayRef<FloatType>;
std::vector<Vector> _vectors;
+ SubspaceType _subspace_type;
public:
- MyDocVectorAccess() : _vectors() {}
+ MyDocVectorAccess()
+ : _vectors(),
+ _subspace_type(ValueType::make_type(get_cell_type<FloatType>(), {{"dims", 2}}))
+ {
+ }
MyDocVectorAccess& set(uint32_t docid, const Vector& vec) {
if (docid >= _vectors.size()) {
_vectors.resize(docid + 1);
@@ -52,7 +60,8 @@ public:
}
VectorBundle get_vectors(uint32_t docid) const override {
ArrayRef ref(_vectors[docid]);
- return VectorBundle(ref.data(), get_cell_type<FloatType>(), 1, ref.size() * sizeof(FloatType), ref.size());
+ assert(_subspace_type.size() == ref.size());
+ return VectorBundle(ref.data(), 1, _subspace_type);
}
void clear() { _vectors.clear(); }
@@ -719,8 +728,8 @@ public:
UP prepare_add(uint32_t docid, uint32_t max_level = 0) {
level_generator->level = max_level;
vespalib::GenerationHandler::Guard dummy;
- auto vector = vectors.get_vector(docid, 0);
- return index->prepare_add_document(docid, vector, dummy);
+ auto vectors_to_add = vectors.get_vectors(docid);
+ return index->prepare_add_document(docid, vectors_to_add, dummy);
}
void complete_add(uint32_t docid, UP up) {
index->complete_add_document(docid, std::move(up));
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 4cc24657a00..2a3f0b4af27 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -9,6 +9,7 @@
#include <vector>
#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/eval/eval/value_type.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/tensor/distance_functions.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
@@ -33,6 +34,7 @@ using namespace search::tensor;
using namespace vespalib::slime;
using search::BitVector;
using vespalib::eval::CellType;
+using vespalib::eval::ValueType;
using vespalib::GenerationHandler;
using vespalib::MemoryUsage;
using vespalib::Slime;
@@ -42,6 +44,12 @@ using vespalib::Slime;
#define NUM_POSSIBLE_DOCS 30000
#define NUM_OPS 1000000
+namespace {
+
+SubspaceType subspace_type(ValueType::make_type(CellType::FLOAT, {{"dims", NUM_DIMS }}));
+
+}
+
class RndGen {
private:
std::mt19937_64 urng;
@@ -121,7 +129,8 @@ public:
VectorBundle get_vectors(uint32_t docid) const override {
assert(docid < NUM_POSSIBLE_DOCS);
ConstVectorRef ref(_vectors[docid]);
- return VectorBundle(ref.data(), CellType::FLOAT, 1, sizeof(float) * NUM_DIMS, NUM_DIMS);
+ assert(subspace_type.size() == ref.size());
+ return VectorBundle(ref.data(), 1, subspace_type);
}
};
@@ -184,7 +193,8 @@ public:
return result_promise.get_future();
}
void run() override {
- auto v = vespalib::eval::TypedCells(vec);
+ assert(subspace_type.size() == vec.size());
+ VectorBundle v(vec.data(), 1, subspace_type);
auto up = parent.index->prepare_add_document(docid, v, read_guard);
result_promise.set_value(std::move(up));
}
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 2e8b619b2c4..bb2df40c368 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -28,6 +28,7 @@ vespa_add_library(searchlib_tensor OBJECT
nearest_neighbor_index_saver.cpp
serialized_fast_value_attribute.cpp
small_subspaces_buffer_type.cpp
+ subspace_type.cpp
tensor_attribute.cpp
tensor_attribute_loader.cpp
tensor_attribute_saver.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index fd94c4eb60c..5cd5455bcac 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -52,7 +52,8 @@ DenseTensorAttribute::prepare_set_tensor(DocId docid, const Value& tensor) const
// With this optimization we avoid doing unnecessary costly work, first removing the vector point, then inserting the same point.
return {};
}
- return _index->prepare_add_document(docid, tensor.cells(), getGenerationHandler().takeGuard());
+ VectorBundle vectors(tensor.cells().data, tensor.index().size(), _denseTensorStore.get_subspace_type());
+ return _index->prepare_add_document(docid, vectors, getGenerationHandler().takeGuard());
}
return {};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
index c7327422f81..c373f6bdcd0 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "dense_tensor_store.h"
+#include "subspace_type.h"
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/datastore/compacting_buffers.h>
#include <vespa/vespalib/datastore/compaction_context.h>
@@ -79,7 +80,8 @@ DenseTensorStore::DenseTensorStore(const ValueType &type, std::shared_ptr<vespal
_tensorSizeCalc(type),
_bufferType(_tensorSizeCalc, std::move(allocator)),
_type(type),
- _empty(type)
+ _subspace_type(type),
+ _empty(_subspace_type)
{
_store.addType(&_bufferType);
_store.init_primary_buffers();
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
index 7b133977073..9e326e0ab1e 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
@@ -52,6 +52,7 @@ private:
TensorSizeCalc _tensorSizeCalc;
BufferType _bufferType;
ValueType _type; // type of dense tensor
+ SubspaceType _subspace_type;
EmptySubspace _empty;
public:
DenseTensorStore(const ValueType &type, std::shared_ptr<vespalib::alloc::MemoryAllocator> allocator);
@@ -86,8 +87,9 @@ public:
if (!ref.valid()) {
return VectorBundle();
}
- return VectorBundle(getRawBuffer(ref), _type.cell_type(), 1, getBufSize(), getNumCells());
+ return VectorBundle(getRawBuffer(ref), 1, _subspace_type);
}
+ const SubspaceType& get_subspace_type() const noexcept { return _subspace_type; }
// The following method is meant to be used only for unit tests.
uint32_t getArraySize() const { return _bufferType.getArraySize(); }
};
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp
index cfc70cddb5c..fa13ab6303c 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.cpp
@@ -53,7 +53,8 @@ DirectTensorStore::add_entry(TensorSP tensor)
DirectTensorStore::DirectTensorStore(const vespalib::eval::ValueType& tensor_type)
: TensorStore(_tensor_store),
_tensor_store(std::make_unique<TensorBufferType>()),
- _empty(tensor_type)
+ _subspace_type(tensor_type),
+ _empty(_subspace_type)
{
_tensor_store.enableFreeLists();
}
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
index a84c321c13a..01084e89776 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
@@ -4,6 +4,7 @@
#include "tensor_store.h"
#include "empty_subspace.h"
+#include "subspace_type.h"
#include "vector_bundle.h"
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/datastore/datastore.h>
@@ -35,6 +36,7 @@ private:
};
TensorStoreType _tensor_store;
+ SubspaceType _subspace_type;
EmptySubspace _empty;
EntryRef add_entry(TensorSP tensor);
@@ -68,9 +70,7 @@ public:
if (tensor == nullptr) {
return VectorBundle();
}
- auto type = tensor->type();
- auto subspace_size = type.dense_subspace_size();
- return VectorBundle(tensor->cells().data, type.cell_type(), tensor->index().size(), vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace_size), subspace_size);
+ return VectorBundle(tensor->cells().data, tensor->index().size(), _subspace_type);
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
index c225a6082f5..f46531e4fbb 100644
--- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
@@ -1,18 +1,16 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "empty_subspace.h"
-#include <vespa/eval/eval/value_type.h>
+#include "subspace_type.h"
namespace search::tensor {
-EmptySubspace::EmptySubspace(const vespalib::eval::ValueType& type)
+EmptySubspace::EmptySubspace(const SubspaceType& type)
: _empty_space(),
_cells()
{
- auto dense_subspace_size = type.dense_subspace_size();
- auto cell_type = type.cell_type();
- _empty_space.resize(vespalib::eval::CellTypeUtils::mem_size(cell_type, dense_subspace_size), 0);
- _cells = vespalib::eval::TypedCells(&_empty_space[0], cell_type, dense_subspace_size);
+ _empty_space.resize(type.mem_size());
+ _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), type.size());
}
EmptySubspace::~EmptySubspace() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
index 8e58e35712f..017486bc643 100644
--- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
+++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.h
@@ -5,10 +5,10 @@
#include <vespa/eval/eval/typed_cells.h>
#include <vector>
-namespace vespalib::eval { class ValueType; }
-
namespace search::tensor {
+class SubspaceType;
+
/*
* Class containg an empty subspace, used as a bad fallback when we cannot
* get a real subspace.
@@ -18,7 +18,7 @@ class EmptySubspace
std::vector<char> _empty_space;
vespalib::eval::TypedCells _cells;
public:
- EmptySubspace(const vespalib::eval::ValueType& type);
+ explicit EmptySubspace(const SubspaceType& type);
~EmptySubspace();
const vespalib::eval::TypedCells& cells() const noexcept { return _cells; }
};
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 89b4f62146c..e9ce77cc0d6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -7,6 +7,7 @@
#include "hnsw_index_loader.hpp"
#include "hnsw_index_saver.h"
#include "random_level_generator.h"
+#include "vector_bundle.h"
#include <vespa/searchlib/attribute/address_space_components.h>
#include <vespa/searchlib/attribute/address_space_usage.h>
#include <vespa/searchlib/queryeval/global_filter.h>
@@ -347,8 +348,10 @@ HnswIndex::add_document(uint32_t docid)
}
HnswIndex::PreparedAddDoc
-HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespalib::GenerationHandler::Guard read_guard) const
+HnswIndex::internal_prepare_add(uint32_t docid, VectorBundle input_vectors, vespalib::GenerationHandler::Guard read_guard) const
{
+ assert(input_vectors.subspaces() == 1);
+ auto input_vector = input_vectors.cells(0);
// TODO: Add capping on num_levels
int level = _level_generator->max_level();
PreparedAddDoc op(docid, level, std::move(read_guard));
@@ -424,8 +427,8 @@ HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op)
std::unique_ptr<PrepareResult>
HnswIndex::prepare_add_document(uint32_t docid,
- TypedCells vector,
- vespalib::GenerationHandler::Guard read_guard) const
+ VectorBundle vectors,
+ vespalib::GenerationHandler::Guard read_guard) const
{
uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire);
if (max_nodes < _cfg.min_size_before_two_phase()) {
@@ -433,7 +436,7 @@ HnswIndex::prepare_add_document(uint32_t docid,
// to ensure they are linked together:
return std::make_unique<PreparedFirstAddDoc>();
}
- PreparedAddDoc op = internal_prepare_add(docid, vector, std::move(read_guard));
+ PreparedAddDoc op = internal_prepare_add(docid, vectors, std::move(read_guard));
return std::make_unique<PreparedAddDoc>(std::move(op));
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 9a05fe223c5..2714464073e 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -9,6 +9,7 @@
#include "nearest_neighbor_index.h"
#include "random_level_generator.h"
#include "hnsw_graph.h"
+#include "vector_bundle.h"
#include <vespa/eval/eval/typed_cells.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/datastore/array_store.h>
@@ -134,8 +135,8 @@ protected:
uint32_t docid = get_docid(nodeid);
return _vectors.get_vector(docid, 0);
}
- inline TypedCells get_vector_by_docid(uint32_t docid) const {
- return _vectors.get_vector(docid, 0);
+ inline VectorBundle get_vector_by_docid(uint32_t docid) const {
+ return _vectors.get_vectors(docid);
}
double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const;
@@ -174,7 +175,7 @@ protected:
~PreparedAddDoc() = default;
PreparedAddDoc(PreparedAddDoc&& other) = default;
};
- PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector,
+ PreparedAddDoc internal_prepare_add(uint32_t docid, VectorBundle input_vectors,
vespalib::GenerationHandler::Guard read_guard) const;
LinkArray filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid);
void internal_complete_add(uint32_t docid, PreparedAddDoc &op);
@@ -188,8 +189,8 @@ public:
// Implements NearestNeighborIndex
void add_document(uint32_t docid) override;
std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid,
- TypedCells vector,
- vespalib::GenerationHandler::Guard read_guard) const override;
+ VectorBundle vectors,
+ vespalib::GenerationHandler::Guard read_guard) const override;
void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override;
void remove_node(uint32_t nodeid);
void remove_document(uint32_t docid) override;
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index d40803dcafd..de1ea26d7bf 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -4,6 +4,7 @@
#include "distance_function.h"
#include "prepare_result.h"
+#include "vector_bundle.h"
#include <vespa/vespalib/util/generationhandler.h>
#include <vespa/vespalib/util/memoryusage.h>
#include <cstdint>
@@ -57,7 +58,7 @@ public:
* The given read guard must be kept in the result.
*/
virtual std::unique_ptr<PrepareResult> prepare_add_document(uint32_t docid,
- vespalib::eval::TypedCells vector,
+ VectorBundle vectors,
vespalib::GenerationHandler::Guard read_guard) const = 0;
/**
* Performs the complete step in a two-phase operation to add a document to the index.
diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp
new file mode 100644
index 00000000000..187af7531af
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.cpp
@@ -0,0 +1,15 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "subspace_type.h"
+#include <vespa/eval/eval/value_type.h>
+
+namespace search::tensor {
+
+SubspaceType::SubspaceType(const vespalib::eval::ValueType& type)
+ : _cell_type(type.cell_type()),
+ _size(type.dense_subspace_size()),
+ _mem_size(vespalib::eval::CellTypeUtils::mem_size(_cell_type, _size))
+{
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/subspace_type.h b/searchlib/src/vespa/searchlib/tensor/subspace_type.h
new file mode 100644
index 00000000000..88520723155
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/subspace_type.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/cell_type.h>
+
+namespace vespalib::eval { class ValueType; }
+
+namespace search::tensor {
+
+/*
+ * Class describing the type of a dense subspace in a tensor.
+ */
+class SubspaceType
+{
+ vespalib::eval::CellType _cell_type;
+ size_t _size; // # cells
+ size_t _mem_size; // # bytes
+public:
+ explicit SubspaceType(const vespalib::eval::ValueType& type);
+ vespalib::eval::CellType cell_type() const noexcept { return _cell_type; }
+ size_t size() const noexcept { return _size; }
+ size_t mem_size() const noexcept { return _mem_size; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
index 1d43afe505e..922c2fefa28 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute_loader.cpp
@@ -159,7 +159,7 @@ ThreadedIndexBuilder::add(uint32_t lid, EntryRef ref) {
++_pending;
auto dense_store = _store.as_dense();
auto task = vespalib::makeLambdaTask([this, ref, lid, dense_store]() {
- auto prepared = _index.prepare_add_document(lid, dense_store->get_typed_cells(ref),
+ auto prepared = _index.prepare_add_document(lid, dense_store->get_vectors(ref),
_generation_handler.takeGuard());
std::unique_lock guard(_mutex);
_queue.push(std::make_pair(lid, std::move(prepared)));
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
index 3a861552d80..4f0db54ac2a 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.cpp
@@ -68,14 +68,12 @@ FastValueView::FastValueView(const ValueType& type, ConstArrayRef<string_id> lab
}
TensorBufferOperations::TensorBufferOperations(const vespalib::eval::ValueType& tensor_type)
- : _num_mapped_dimensions(tensor_type.count_mapped_dimensions()),
- _cell_mem_size(vespalib::eval::CellTypeUtils::mem_size(tensor_type.cell_type(), 1u)),
- _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(tensor_type.cell_type()))),
- _dense_subspace_size(tensor_type.dense_subspace_size()),
- _cell_type(tensor_type.cell_type()),
+ : _subspace_type(tensor_type),
+ _num_mapped_dimensions(tensor_type.count_mapped_dimensions()),
+ _min_alignment(adjust_min_alignment(vespalib::eval::CellTypeUtils::alignment(_subspace_type.cell_type()))),
_addr(_num_mapped_dimensions),
_addr_refs(),
- _empty(tensor_type)
+ _empty(_subspace_type)
{
_addr_refs.reserve(_addr.size());
for (auto& label : _addr) {
@@ -106,8 +104,8 @@ TensorBufferOperations::store_tensor(ArrayRef<char> buf, const vespalib::eval::V
uint32_t num_subspaces = tensor.index().size();
assert(num_subspaces <= num_subspaces_mask);
auto labels_end_offset = get_labels_offset() + get_labels_mem_size(num_subspaces);
- auto cells_size = num_subspaces * _dense_subspace_size;
- auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes
+ auto cells_size = num_subspaces * _subspace_type.size();
+ auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes
auto aligner = select_aligner(cells_mem_size);
auto cells_start_offset = aligner.align(labels_end_offset);
auto cells_end_offset = cells_start_offset + cells_mem_size;
@@ -148,11 +146,11 @@ TensorBufferOperations::make_fast_view(ConstArrayRef<char> buf, const vespalib::
auto num_subspaces = get_num_subspaces(buf);
assert(buf.size() >= get_array_size(num_subspaces));
ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
- auto cells_size = num_subspaces * _dense_subspace_size;
- auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes
+ auto cells_size = num_subspaces * _subspace_type.size();
+ auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes
auto aligner = select_aligner(cells_mem_size);
auto cells_start_offset = get_cells_offset(num_subspaces, aligner);
- TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size);
+ TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size);
assert(cells_start_offset + cells_mem_size <= buf.size());
return std::make_unique<FastValueView>(tensor_type, labels, cells, _num_mapped_dimensions, num_subspaces);
}
@@ -187,11 +185,11 @@ TensorBufferOperations::encode_stored_tensor(ConstArrayRef<char> buf, const vesp
auto num_subspaces = get_num_subspaces(buf);
assert(buf.size() >= get_array_size(num_subspaces));
ConstArrayRef<string_id> labels(reinterpret_cast<const string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
- auto cells_size = num_subspaces * _dense_subspace_size;
- auto cells_mem_size = cells_size * _cell_mem_size; // Size measured in bytes
+ auto cells_size = num_subspaces * _subspace_type.size();
+ auto cells_mem_size = num_subspaces * _subspace_type.mem_size(); // Size measured in bytes
auto aligner = select_aligner(cells_mem_size);
auto cells_start_offset = get_cells_offset(num_subspaces, aligner);
- TypedCells cells(buf.data() + cells_start_offset, _cell_type, cells_size);
+ TypedCells cells(buf.data() + cells_start_offset, _subspace_type.cell_type(), cells_size);
assert(cells_start_offset + cells_mem_size <= buf.size());
StringIdVector labels_copy(labels.begin(), labels.end());
StreamedValueView streamed_value_view(tensor_type, _num_mapped_dimensions, cells, num_subspaces, labels_copy);
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 43463e50ff1..26cf9a429a1 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -3,6 +3,7 @@
#pragma once
#include "empty_subspace.h"
+#include "subspace_type.h"
#include "vector_bundle.h"
#include <vespa/vespalib/datastore/aligner.h>
#include <vespa/vespalib/util/arrayref.h>
@@ -39,11 +40,9 @@ namespace search::tensor {
*/
class TensorBufferOperations
{
+ SubspaceType _subspace_type;
uint32_t _num_mapped_dimensions;
- uint32_t _cell_mem_size;
uint32_t _min_alignment;
- size_t _dense_subspace_size;
- vespalib::eval::CellType _cell_type;
std::vector<vespalib::string_id> _addr;
std::vector<vespalib::string_id*> _addr_refs;
EmptySubspace _empty;
@@ -58,7 +57,7 @@ class TensorBufferOperations
static constexpr size_t get_num_subspaces_size() noexcept { return sizeof(uint32_t); }
static constexpr size_t get_labels_offset() noexcept { return get_num_subspaces_size(); }
size_t get_cells_mem_size(uint32_t num_subspaces) const noexcept {
- return _dense_subspace_size * _cell_mem_size * num_subspaces;
+ return _subspace_type.mem_size() * num_subspaces;
}
auto select_aligner(size_t cells_mem_size) const noexcept {
return Aligner((cells_mem_size < CELLS_ALIGNMENT_MEM_SIZE_MIN) ? _min_alignment : CELLS_ALIGNMENT);
@@ -108,7 +107,7 @@ public:
auto num_subspaces = get_num_subspaces(buf);
auto cells_mem_size = get_cells_mem_size(num_subspaces);
auto aligner = select_aligner(cells_mem_size);
- return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), _cell_type, num_subspaces, _dense_subspace_size * _cell_mem_size, _dense_subspace_size);
+ return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type);
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
index cb1d3f99cfb..09d0b514954 100644
--- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
+++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
@@ -2,6 +2,7 @@
#pragma once
+#include "subspace_type.h"
#include <vespa/eval/eval/typed_cells.h>
#include <cassert>
@@ -27,12 +28,12 @@ public:
_subspace_size(0)
{
}
- VectorBundle(const void *data, vespalib::eval::CellType cell_type, uint32_t subspaces, size_t subspace_mem_size, size_t subspace_size)
+ VectorBundle(const void *data, uint32_t subspaces, const SubspaceType& subspace_type)
: _data(data),
- _cell_type(cell_type),
+ _cell_type(subspace_type.cell_type()),
_subspaces(subspaces),
- _subspace_mem_size(subspace_mem_size),
- _subspace_size(subspace_size)
+ _subspace_mem_size(subspace_type.mem_size()),
+ _subspace_size(subspace_type.size())
{
}
~VectorBundle() = default;