diff options
Diffstat (limited to 'searchlib/src')
16 files changed, 125 insertions, 33 deletions
diff --git a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp index cf0656fc919..bd62e8a7f3c 100644 --- a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp +++ b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp @@ -125,24 +125,21 @@ TEST_F(DirectTensorStoreTest, move_on_compact_allocates_new_entry_and_leaves_old EXPECT_GT(mem_2.usedBytes(), mem_1.usedBytes() + tensor_mem_usage.allocatedBytes()); } -TEST_F(DirectTensorStoreTest, get_typed_cells) +TEST_F(DirectTensorStoreTest, get_vectors) { auto tensor_spec = TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5).add({{"x", "b"}}, 5.5).add({{"x", "c"}}, 6.5).add({{"x", "d"}}, 7.5); auto tensor = value_from_spec(tensor_spec, FastValueBuilderFactory::get()); auto ref = store.store_tensor(std::move(tensor)); std::vector<double> values; + auto vectors = store.get_vectors(ref); + EXPECT_EQ(4, vectors.subspaces()); for (uint32_t subspace = 0; subspace < 4; ++subspace) { - auto cells = store.get_typed_cells(ref, subspace).typify<double>(); + auto cells = vectors.cells(subspace).typify<double>(); EXPECT_EQ(1, cells.size()); values.emplace_back(cells[0]); } EXPECT_EQ((std::vector<double>{4.5, 5.5, 6.5, 7.5}), values); - for (auto tref : { ref, EntryRef() }) { - auto subspace = tref.valid() ? 4 : 0; - auto cells = store.get_typed_cells(tref, subspace).typify<double>(); - EXPECT_EQ(1, cells.size()); - EXPECT_EQ(0.0, cells[0]); - } + EXPECT_EQ(0, store.get_vectors(EntryRef()).subspaces()); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 8d3d389090b..1738ee510c8 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -6,6 +6,7 @@ #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/random_level_generator.h> #include <vespa/searchlib/tensor/inv_log_level_generator.h> +#include <vespa/searchlib/tensor/vector_bundle.h> #include <vespa/searchlib/queryeval/global_filter.h> #include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/datastore/compaction_strategy.h> @@ -23,6 +24,7 @@ using namespace search::tensor; using namespace vespalib::slime; using vespalib::Slime; using search::BitVector; +using vespalib::eval::get_cell_type; using vespalib::datastore::CompactionSpec; using vespalib::datastore::CompactionStrategy; using search::queryeval::GlobalFilter; @@ -48,6 +50,10 @@ public: ArrayRef ref(_vectors[docid]); return vespalib::eval::TypedCells(ref); } + VectorBundle get_vectors(uint32_t docid) const override { + ArrayRef ref(_vectors[docid]); + return VectorBundle(ref.data(), get_cell_type<FloatType>(), 1, ref.size() * sizeof(FloatType), ref.size()); + } void clear() { _vectors.clear(); } }; diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index c5c88d2eeff..4cc24657a00 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -15,6 +15,7 @@ #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/inv_log_level_generator.h> #include <vespa/searchlib/tensor/random_level_generator.h> +#include <vespa/searchlib/tensor/vector_bundle.h> #include <vespa/vespalib/data/input.h> #include <vespa/vespalib/data/memory_input.h> #include <vespa/vespalib/data/slime/slime.h> @@ -31,6 +32,7 @@ LOG_SETUP("stress_hnsw_mt"); using namespace search::tensor; using namespace vespalib::slime; using search::BitVector; +using vespalib::eval::CellType; using vespalib::GenerationHandler; using vespalib::MemoryUsage; using vespalib::Slime; @@ -116,6 +118,11 @@ public: ConstVectorRef ref(_vectors[docid]); return vespalib::eval::TypedCells(ref); } + VectorBundle get_vectors(uint32_t docid) const override { + assert(docid < NUM_POSSIBLE_DOCS); + ConstVectorRef ref(_vectors[docid]); + return VectorBundle(ref.data(), CellType::FLOAT, 1, sizeof(float) * NUM_DIMS, NUM_DIMS); + } }; using FloatSqEuclideanDistance = SquaredEuclideanDistanceHW<float>; diff --git a/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp index 05e40200167..ec7fc2334c4 100644 --- a/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp +++ b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp @@ -162,22 +162,19 @@ TEST_F(TensorBufferStoreTest, stored_tensor_can_be_encoded_and_stored_as_encoded } } -TEST_F(TensorBufferStoreTest, get_typed_cells) +TEST_F(TensorBufferStoreTest, get_vectors) { auto ref = store_tensor(tensor_specs.back()); std::vector<double> values; + auto vectors = _store.get_vectors(ref); + EXPECT_EQ(4, vectors.subspaces()); for (uint32_t subspace = 0; subspace < 4; ++subspace) { - auto cells = _store.get_typed_cells(ref, subspace).typify<double>(); + auto cells = vectors.cells(subspace).typify<double>(); EXPECT_EQ(1, cells.size()); values.emplace_back(cells[0]); } EXPECT_EQ((std::vector<double>{4.5, 5.5, 6.5, 7.5}), values); - for (auto tref : { ref, EntryRef() }) { - auto subspace = tref.valid() ? 4 : 0; - auto cells = _store.get_typed_cells(tref, subspace).typify<double>(); - EXPECT_EQ(1, cells.size()); - EXPECT_EQ(0.0, cells[0]); - } + EXPECT_EQ(0, _store.get_vectors(EntryRef()).subspaces()); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index e69a6d925e9..fd94c4eb60c 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -99,4 +99,11 @@ DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const return _denseTensorStore.get_typed_cells(ref); } +VectorBundle +DenseTensorAttribute::get_vectors(uint32_t docid) const +{ + EntryRef ref = acquire_entry_ref(docid); + return _denseTensorStore.get_vectors(ref); +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h index dc398b7ec6f..2db7bb332a9 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h @@ -35,6 +35,7 @@ public: // Implements DocVectorAccess vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; + VectorBundle get_vectors(uint32_t docid) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h index 9d0ad6536b0..7b133977073 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h @@ -4,6 +4,7 @@ #include "tensor_store.h" #include "empty_subspace.h" +#include "vector_bundle.h" #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/typed_cells.h> #include <vespa/vespalib/datastore/datastore.h> @@ -81,6 +82,12 @@ public: return vespalib::eval::TypedCells(getRawBuffer(ref), _type.cell_type(), getNumCells()); } + VectorBundle get_vectors(EntryRef ref) const { + if (!ref.valid()) { + return VectorBundle(); + } + return VectorBundle(getRawBuffer(ref), _type.cell_type(), 1, getBufSize(), getNumCells()); + } // The following method is meant to be used only for unit tests. uint32_t getArraySize() const { return _bufferType.getArraySize(); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp index f1dd7238805..c2f0ff36c3a 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp @@ -77,7 +77,15 @@ vespalib::eval::TypedCells DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const { EntryRef ref = acquire_entry_ref(docid); - return _direct_store.get_typed_cells(ref, subspace); + auto vectors = _direct_store.get_vectors(ref); + return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _direct_store.get_empty_subspace(); +} + +VectorBundle +DirectTensorAttribute::get_vectors(uint32_t docid) const +{ + EntryRef ref = acquire_entry_ref(docid); + return _direct_store.get_vectors(ref); } } // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h index 98c34ec1060..ed48ea20e0f 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h @@ -26,6 +26,7 @@ public: // Implements DocVectorAccess vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; + VectorBundle get_vectors(uint32_t docid) const override; }; } // namespace search::tensor diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h index e11c215d44c..a84c321c13a 100644 --- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h +++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h @@ -4,6 +4,7 @@ #include "tensor_store.h" #include "empty_subspace.h" +#include "vector_bundle.h" #include <vespa/eval/eval/value.h> #include <vespa/vespalib/datastore/datastore.h> @@ -59,16 +60,17 @@ public: EntryRef store_encoded_tensor(vespalib::nbostream& encoded) override; std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const override; bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const override; - vespalib::eval::TypedCells get_typed_cells(EntryRef ref, uint32_t subspace) const { + vespalib::eval::TypedCells get_empty_subspace() const noexcept { + return _empty.cells(); + } + VectorBundle get_vectors(EntryRef ref) const { auto tensor = get_tensor_ptr(ref); - if (tensor == nullptr || subspace >= tensor->index().size()) { - return _empty.cells(); + if (tensor == nullptr) { + return VectorBundle(); } - auto cells = tensor->cells(); auto type = tensor->type(); - auto data = static_cast<const char *>(cells.data); - auto dense_subspace_size = type.dense_subspace_size(); - return vespalib::eval::TypedCells(data + vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace * dense_subspace_size), cells.type, dense_subspace_size); + auto subspace_size = type.dense_subspace_size(); + return VectorBundle(tensor->cells().data, type.cell_type(), tensor->index().size(), vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace_size), subspace_size); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h index 7a64c1d28ae..ab1d8d331d9 100644 --- a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h +++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h @@ -7,6 +7,8 @@ namespace search::tensor { +class VectorBundle; + /** * Interface that provides access to the vector that is associated with the the given document id. * @@ -16,6 +18,7 @@ class DocVectorAccess { public: virtual ~DocVectorAccess() {} virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const = 0; + virtual VectorBundle get_vectors(uint32_t docid) const = 0; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 3ae592e6e6f..4fd8da5ac9d 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -30,7 +30,15 @@ vespalib::eval::TypedCells SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const { EntryRef ref = acquire_entry_ref(docid); - return _tensorBufferStore.get_typed_cells(ref, subspace); + auto vectors = _tensorBufferStore.get_vectors(ref); + return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _tensorBufferStore.get_empty_subspace(); +} + +VectorBundle +SerializedFastValueAttribute::get_vectors(uint32_t docid) const +{ + EntryRef ref = acquire_entry_ref(docid); + return _tensorBufferStore.get_vectors(ref); } } diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h index 6215dbbc461..31a7f136d23 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h @@ -24,6 +24,7 @@ public: // Implements DocVectorAccess vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; + VectorBundle get_vectors(uint32_t docid) const override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 963e9b99920..43463e50ff1 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -3,6 +3,7 @@ #pragma once #include "empty_subspace.h" +#include "vector_bundle.h" #include <vespa/vespalib/datastore/aligner.h> #include <vespa/vespalib/util/arrayref.h> #include <vespa/vespalib/util/string_id.h> @@ -103,14 +104,11 @@ public: vespalib::eval::TypedCells get_empty_subspace() const noexcept { return _empty.cells(); } - vespalib::eval::TypedCells get_typed_cells(vespalib::ConstArrayRef<char> buf, uint32_t subspace) const { + VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const { auto num_subspaces = get_num_subspaces(buf); - if (subspace >= num_subspaces) { - return _empty.cells(); - } auto cells_mem_size = get_cells_mem_size(num_subspaces); auto aligner = select_aligner(cells_mem_size); - return vespalib::eval::TypedCells(buf.data() + get_cells_offset(num_subspaces, aligner) + get_cells_mem_size(subspace), _cell_type, _dense_subspace_size); + return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), _cell_type, num_subspaces, _dense_subspace_size * _cell_mem_size, _dense_subspace_size); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h index 071e238d5cc..ce00977c298 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h @@ -34,12 +34,15 @@ public: EntryRef store_encoded_tensor(vespalib::nbostream& encoded) override; std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const override; bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const override; - vespalib::eval::TypedCells get_typed_cells(EntryRef ref, uint32_t subspace) const { + vespalib::eval::TypedCells get_empty_subspace() const noexcept { + return _ops.get_empty_subspace(); + } + VectorBundle get_vectors(EntryRef ref) const { if (!ref.valid()) { - return _ops.get_empty_subspace(); + return VectorBundle(); } auto buf = _array_store.get(ref); - return _ops.get_typed_cells(buf, subspace); + return _ops.get_vectors(buf); } }; diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h new file mode 100644 index 00000000000..cb1d3f99cfb --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/typed_cells.h> +#include <cassert> + +namespace search::tensor { + +/* + * Class referencing the cells owned by a tensor in a form suitable to extract tensor cells for + * a subspace. + */ +class VectorBundle +{ + const void* _data; + vespalib::eval::CellType _cell_type; + uint32_t _subspaces; + size_t _subspace_mem_size; + size_t _subspace_size; +public: + VectorBundle() + : _data(nullptr), + _cell_type(vespalib::eval::CellType::DOUBLE), + _subspaces(0), + _subspace_mem_size(0), + _subspace_size(0) + { + } + VectorBundle(const void *data, vespalib::eval::CellType cell_type, uint32_t subspaces, size_t subspace_mem_size, size_t subspace_size) + : _data(data), + _cell_type(cell_type), + _subspaces(subspaces), + _subspace_mem_size(subspace_mem_size), + _subspace_size(subspace_size) + { + } + ~VectorBundle() = default; + uint32_t subspaces() const noexcept { return _subspaces; } + const vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept { + assert(subspace < _subspaces); + return vespalib::eval::TypedCells(static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size); + } +}; + +} |