diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-02-22 15:33:10 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-02-22 15:33:10 +0100 |
commit | d3225d03eee4ac67d1d23e076f77d9f25444c990 (patch) | |
tree | e2884b00f772669db26bbf69d4d824279933e908 /searchlib | |
parent | a55889ca8ba1f12b60e3e03813823483f8673c5f (diff) |
Add SerializedTensorRef.
Diffstat (limited to 'searchlib')
13 files changed, 177 insertions, 1 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 2f51459ebfa..28c50891225 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -26,9 +26,11 @@ #include <vespa/searchlib/util/bufferwriter.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/document/base/exceptions.h> +#include <vespa/eval/eval/fast_value.h> #include <vespa/eval/eval/simple_value.h> #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/value_codec.h> #include <vespa/eval/eval/test/value_compare.h> #include <vespa/fastos/file.h> #include <filesystem> @@ -60,7 +62,9 @@ using search::tensor::PrepareResult; using search::tensor::SerializedFastValueAttribute; using search::tensor::TensorAttribute; using search::tensor::VectorBundle; +using vespalib::SharedStringRepo; using vespalib::datastore::CompactionStrategy; +using vespalib::eval::FastValueBuilderFactory; using vespalib::eval::CellType; using vespalib::eval::SimpleValue; using vespalib::eval::TensorSpec; @@ -76,7 +80,17 @@ vespalib::string vec_2d_spec("tensor(x[2])"); vespalib::string vec_mixed_2d_spec("tensor(a{},x[2])"); Value::UP createTensor(const TensorSpec &spec) { - return SimpleValue::from_spec(spec); + return value_from_spec(spec, FastValueBuilderFactory::get()); +} + +std::vector<vespalib::string> +to_string_labels(vespalib::ConstArrayRef<vespalib::string_id> labels) +{ + std::vector<vespalib::string> result; + for (auto& label : labels) { + result.emplace_back(SharedStringRepo::Handle::string_from_id(label)); + } + return result; } TensorSpec @@ -569,6 +583,7 @@ struct Fixture { void testCompaction(); void testTensorTypeFileHeaderTag(); void testEmptyTensor(); + void testSerializedTensorRef(); void testOnHoldAccounting(); void test_populate_address_space_usage(); void test_mmap_file_allocator(); @@ -776,6 +791,44 @@ Fixture::testEmptyTensor() } void +Fixture::testSerializedTensorRef() +{ + const TensorAttribute &tensorAttr = *_tensorAttr; + if (_traits.use_dense_tensor_attribute || _traits.use_direct_tensor_attribute) { + EXPECT_FALSE(tensorAttr.supports_get_serialized_tensor_ref()); + return; + } + EXPECT_TRUE(tensorAttr.supports_get_serialized_tensor_ref()); + if (_denseTensors) { + set_tensor(3, expDenseTensor3()); + } else { + set_tensor(3, TensorSpec(sparseSpec) + .add({{"x", "one"}, {"y", "two"}}, 11) + .add({{"x", "three"}, {"y", "four"}}, 17)); + } + auto ref = tensorAttr.get_serialized_tensor_ref(3); + auto vectors = ref.get_vectors(); + if (_denseTensors) { + EXPECT_EQUAL(1u, vectors.subspaces()); + auto cells = vectors.cells(0).typify<double>(); + auto labels = ref.get_labels(0); + EXPECT_EQUAL(0u, labels.size()); + EXPECT_EQUAL((std::vector<double>{0.0, 11.0, 0.0, 0.0, 0.0, 0.0}), (std::vector<double>{ cells.begin(), cells.end() })); + } else { + EXPECT_EQUAL(2u, vectors.subspaces()); + auto cells = vectors.cells(0).typify<double>(); + auto labels = ref.get_labels(0); + EXPECT_EQUAL((std::vector<vespalib::string>{"one", "two"}), to_string_labels(labels)); + EXPECT_EQUAL((std::vector<double>{11.0}), (std::vector<double>{ cells.begin(), cells.end() })); + cells = vectors.cells(1).typify<double>(); + labels = ref.get_labels(1); + EXPECT_EQUAL((std::vector<vespalib::string>{"three", "four"}), to_string_labels(labels)); + EXPECT_EQUAL((std::vector<double>{17.0}), (std::vector<double>{ cells.begin(), cells.end() })); + } + TEST_DO(clearTensor(3)); +} + +void Fixture::testOnHoldAccounting() { { @@ -829,6 +882,7 @@ void testAll(MakeFixture &&f) TEST_DO(f()->testCompaction()); TEST_DO(f()->testTensorTypeFileHeaderTag()); TEST_DO(f()->testEmptyTensor()); + TEST_DO(f()->testSerializedTensorRef()); TEST_DO(f()->testOnHoldAccounting()); TEST_DO(f()->test_populate_address_space_usage()); TEST_DO(f()->test_mmap_file_allocator()); diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index a00a50f32c8..9f96bce90c9 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -30,6 +30,7 @@ vespa_add_library(searchlib_tensor OBJECT nearest_neighbor_index.cpp nearest_neighbor_index_saver.cpp serialized_fast_value_attribute.cpp + serialized_tensor_ref.cpp small_subspaces_buffer_type.cpp subspace_type.cpp tensor_attribute.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h index 9b5f80b2ece..ec6774c9517 100644 --- a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h @@ -13,6 +13,7 @@ namespace vespalib::slime { struct Inserter; } namespace search::tensor { class NearestNeighborIndex; +class SerializedTensorRef; /** * Interface for tensor attribute used by feature executors to get information. @@ -24,8 +25,10 @@ public: virtual std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const = 0; virtual vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const = 0; virtual const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const = 0; + virtual SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const = 0; virtual bool supports_extract_cells_ref() const = 0; virtual bool supports_get_tensor_ref() const = 0; + virtual bool supports_get_serialized_tensor_ref() const = 0; virtual const vespalib::eval::ValueType & getTensorType() const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp index f9459823ce4..9a7b81ae1fa 100644 --- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp +++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "imported_tensor_attribute_vector_read_guard.h" +#include "serialized_tensor_ref.h" #include "vector_bundle.h" #include <vespa/searchlib/attribute/attributevector.h> #include <vespa/eval/eval/value.h> @@ -79,6 +80,18 @@ ImportedTensorAttributeVectorReadGuard::getTensorType() const return _target_tensor_attribute.getTensorType(); } +SerializedTensorRef +ImportedTensorAttributeVectorReadGuard::get_serialized_tensor_ref(uint32_t docid) const +{ + return _target_tensor_attribute.get_serialized_tensor_ref(getTargetLid(docid)); +} + +bool +ImportedTensorAttributeVectorReadGuard::supports_get_serialized_tensor_ref() const +{ + return _target_tensor_attribute.supports_get_serialized_tensor_ref(); +} + void ImportedTensorAttributeVectorReadGuard::get_state(const vespalib::slime::Inserter& inserter) const { diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h index f277d39e97d..4e1cc9efd96 100644 --- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h +++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h @@ -35,9 +35,11 @@ public: std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override; vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override; const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override; + SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override; bool supports_extract_cells_ref() const override { return _target_tensor_attribute.supports_extract_cells_ref(); } bool supports_get_tensor_ref() const override { return _target_tensor_attribute.supports_get_tensor_ref(); } DistanceMetric distance_metric() const override { return _target_tensor_attribute.distance_metric(); } + bool supports_get_serialized_tensor_ref() const override; uint32_t get_num_docs() const override { return getNumDocs(); } vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 6612db1d27e..51ebc22c269 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "serialized_fast_value_attribute.h" +#include "serialized_tensor_ref.h" #include <vespa/eval/eval/value.h> #include <vespa/searchcommon/attribute/config.h> @@ -26,6 +27,19 @@ SerializedFastValueAttribute::~SerializedFastValueAttribute() _tensorStore.reclaim_all_memory(); } +SerializedTensorRef +SerializedFastValueAttribute::get_serialized_tensor_ref(uint32_t docid) const +{ + EntryRef ref = acquire_entry_ref(docid); + return _tensorBufferStore.get_serialized_tensor_ref(ref); +} + +bool +SerializedFastValueAttribute::supports_get_serialized_tensor_ref() const +{ + return true; +} + vespalib::eval::TypedCells SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const { diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h index 4cfcc3d19a2..9066766fbc4 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h @@ -23,6 +23,9 @@ public: SerializedFastValueAttribute(vespalib::stringref baseFileName, const Config &cfg, const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory()); ~SerializedFastValueAttribute() override; + SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override; + bool supports_get_serialized_tensor_ref() const override; + // Implements DocVectorAccess vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override; VectorBundle get_vectors(uint32_t docid) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp new file mode 100644 index 00000000000..1f8ca9ed2fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "serialized_tensor_ref.h" + +namespace search::tensor { + +SerializedTensorRef::SerializedTensorRef() + : _vectors(), + _num_mapped_dimensions(0), + _labels() +{ +} + +SerializedTensorRef::SerializedTensorRef(VectorBundle vectors, uint32_t num_mapped_dimensions, vespalib::ConstArrayRef<vespalib::string_id> labels) + : _vectors(vectors), + _num_mapped_dimensions(num_mapped_dimensions), + _labels(labels) +{ +} + +SerializedTensorRef::~SerializedTensorRef() = default; + +vespalib::ConstArrayRef<vespalib::string_id> +SerializedTensorRef::get_labels(uint32_t subspace) const +{ + assert(subspace < _vectors.subspaces()); + return {_labels.data() + subspace * _num_mapped_dimensions, _num_mapped_dimensions}; +} + +} diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h new file mode 100644 index 00000000000..01ddaadb2ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "vector_bundle.h" +#include <vespa/vespalib/util/string_id.h> + +namespace search::tensor { + +/* + * This class contains a reference to a tensor stored in a TensorBufferStore. + */ +class SerializedTensorRef +{ + VectorBundle _vectors; + uint32_t _num_mapped_dimensions; + vespalib::ConstArrayRef<vespalib::string_id> _labels; // all subspaces +public: + SerializedTensorRef(); + SerializedTensorRef(VectorBundle vectors, uint32_t num_mapped_dimensions, vespalib::ConstArrayRef<vespalib::string_id> labels); + ~SerializedTensorRef(); + const VectorBundle& get_vectors() const noexcept { return _vectors; } + vespalib::ConstArrayRef<vespalib::string_id> get_labels(uint32_t subspace) const; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp index 9ee8d9fdf46..13dad7fc1f2 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp @@ -4,6 +4,7 @@ #include "nearest_neighbor_index.h" #include "nearest_neighbor_index_factory.h" #include "nearest_neighbor_index_saver.h" +#include "serialized_tensor_ref.h" #include "tensor_attribute_constants.h" #include "tensor_attribute_loader.h" #include "tensor_attribute_saver.h" @@ -261,6 +262,18 @@ TensorAttribute::get_tensor_ref(uint32_t /*docid*/) const notImplemented(); } +SerializedTensorRef +TensorAttribute::get_serialized_tensor_ref(uint32_t) const +{ + notImplemented(); +} + +bool +TensorAttribute::supports_get_serialized_tensor_ref() const +{ + return false; +} + const vespalib::eval::ValueType & TensorAttribute::getTensorType() const { diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h index a4c30a574e5..20c8ae60107 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h @@ -63,8 +63,10 @@ public: std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override; vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override; const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override; + SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override; bool supports_extract_cells_ref() const override { return false; } bool supports_get_tensor_ref() const override { return false; } + bool supports_get_serialized_tensor_ref() const override; const vespalib::eval::ValueType & getTensorType() const override; const NearestNeighborIndex* nearest_neighbor_index() const override; void get_state(const vespalib::slime::Inserter& inserter) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h index 3928b41c2d1..72940cbd6a0 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h @@ -3,6 +3,7 @@ #pragma once #include "empty_subspace.h" +#include "serialized_tensor_ref.h" #include "subspace_type.h" #include "vector_bundle.h" #include <vespa/vespalib/datastore/aligner.h> @@ -110,6 +111,13 @@ public: auto aligner = select_aligner(cells_mem_size); return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type); } + SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const { + auto num_subspaces = get_num_subspaces(buf); + auto cells_mem_size = get_cells_mem_size(num_subspaces); + auto aligner = select_aligner(cells_mem_size); + vespalib::ConstArrayRef<vespalib::string_id> labels(reinterpret_cast<const vespalib::string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions); + return SerializedTensorRef(VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels); + } }; } diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h index f602836bd32..2e86ff5fb67 100644 --- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h +++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h @@ -44,6 +44,13 @@ public: auto buf = _array_store.get(ref); return _ops.get_vectors(buf); } + SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const { + if (!ref.valid()) { + return SerializedTensorRef(); + } + auto buf = _array_store.get(ref); + return _ops.get_serialized_tensor_ref(buf); + } // Used by unit test static constexpr uint32_t get_offset_bits() noexcept { return RefType::offset_bits; } |