summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-02-22 15:33:10 +0100
committerTor Egge <Tor.Egge@online.no>2023-02-22 15:33:10 +0100
commitd3225d03eee4ac67d1d23e076f77d9f25444c990 (patch)
treee2884b00f772669db26bbf69d4d824279933e908 /searchlib
parenta55889ca8ba1f12b60e3e03813823483f8673c5f (diff)
Add SerializedTensorRef.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp56
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h26
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_attribute.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h7
13 files changed, 177 insertions, 1 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 2f51459ebfa..28c50891225 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -26,9 +26,11 @@
#include <vespa/searchlib/util/bufferwriter.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/document/base/exceptions.h>
+#include <vespa/eval/eval/fast_value.h>
#include <vespa/eval/eval/simple_value.h>
#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
#include <vespa/eval/eval/test/value_compare.h>
#include <vespa/fastos/file.h>
#include <filesystem>
@@ -60,7 +62,9 @@ using search::tensor::PrepareResult;
using search::tensor::SerializedFastValueAttribute;
using search::tensor::TensorAttribute;
using search::tensor::VectorBundle;
+using vespalib::SharedStringRepo;
using vespalib::datastore::CompactionStrategy;
+using vespalib::eval::FastValueBuilderFactory;
using vespalib::eval::CellType;
using vespalib::eval::SimpleValue;
using vespalib::eval::TensorSpec;
@@ -76,7 +80,17 @@ vespalib::string vec_2d_spec("tensor(x[2])");
vespalib::string vec_mixed_2d_spec("tensor(a{},x[2])");
Value::UP createTensor(const TensorSpec &spec) {
- return SimpleValue::from_spec(spec);
+ return value_from_spec(spec, FastValueBuilderFactory::get());
+}
+
+std::vector<vespalib::string>
+to_string_labels(vespalib::ConstArrayRef<vespalib::string_id> labels)
+{
+ std::vector<vespalib::string> result;
+ for (auto& label : labels) {
+ result.emplace_back(SharedStringRepo::Handle::string_from_id(label));
+ }
+ return result;
}
TensorSpec
@@ -569,6 +583,7 @@ struct Fixture {
void testCompaction();
void testTensorTypeFileHeaderTag();
void testEmptyTensor();
+ void testSerializedTensorRef();
void testOnHoldAccounting();
void test_populate_address_space_usage();
void test_mmap_file_allocator();
@@ -776,6 +791,44 @@ Fixture::testEmptyTensor()
}
void
+Fixture::testSerializedTensorRef()
+{
+ const TensorAttribute &tensorAttr = *_tensorAttr;
+ if (_traits.use_dense_tensor_attribute || _traits.use_direct_tensor_attribute) {
+ EXPECT_FALSE(tensorAttr.supports_get_serialized_tensor_ref());
+ return;
+ }
+ EXPECT_TRUE(tensorAttr.supports_get_serialized_tensor_ref());
+ if (_denseTensors) {
+ set_tensor(3, expDenseTensor3());
+ } else {
+ set_tensor(3, TensorSpec(sparseSpec)
+ .add({{"x", "one"}, {"y", "two"}}, 11)
+ .add({{"x", "three"}, {"y", "four"}}, 17));
+ }
+ auto ref = tensorAttr.get_serialized_tensor_ref(3);
+ auto vectors = ref.get_vectors();
+ if (_denseTensors) {
+ EXPECT_EQUAL(1u, vectors.subspaces());
+ auto cells = vectors.cells(0).typify<double>();
+ auto labels = ref.get_labels(0);
+ EXPECT_EQUAL(0u, labels.size());
+ EXPECT_EQUAL((std::vector<double>{0.0, 11.0, 0.0, 0.0, 0.0, 0.0}), (std::vector<double>{ cells.begin(), cells.end() }));
+ } else {
+ EXPECT_EQUAL(2u, vectors.subspaces());
+ auto cells = vectors.cells(0).typify<double>();
+ auto labels = ref.get_labels(0);
+ EXPECT_EQUAL((std::vector<vespalib::string>{"one", "two"}), to_string_labels(labels));
+ EXPECT_EQUAL((std::vector<double>{11.0}), (std::vector<double>{ cells.begin(), cells.end() }));
+ cells = vectors.cells(1).typify<double>();
+ labels = ref.get_labels(1);
+ EXPECT_EQUAL((std::vector<vespalib::string>{"three", "four"}), to_string_labels(labels));
+ EXPECT_EQUAL((std::vector<double>{17.0}), (std::vector<double>{ cells.begin(), cells.end() }));
+ }
+ TEST_DO(clearTensor(3));
+}
+
+void
Fixture::testOnHoldAccounting()
{
{
@@ -829,6 +882,7 @@ void testAll(MakeFixture &&f)
TEST_DO(f()->testCompaction());
TEST_DO(f()->testTensorTypeFileHeaderTag());
TEST_DO(f()->testEmptyTensor());
+ TEST_DO(f()->testSerializedTensorRef());
TEST_DO(f()->testOnHoldAccounting());
TEST_DO(f()->test_populate_address_space_usage());
TEST_DO(f()->test_mmap_file_allocator());
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index a00a50f32c8..9f96bce90c9 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -30,6 +30,7 @@ vespa_add_library(searchlib_tensor OBJECT
nearest_neighbor_index.cpp
nearest_neighbor_index_saver.cpp
serialized_fast_value_attribute.cpp
+ serialized_tensor_ref.cpp
small_subspaces_buffer_type.cpp
subspace_type.cpp
tensor_attribute.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
index 9b5f80b2ece..ec6774c9517 100644
--- a/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/i_tensor_attribute.h
@@ -13,6 +13,7 @@ namespace vespalib::slime { struct Inserter; }
namespace search::tensor {
class NearestNeighborIndex;
+class SerializedTensorRef;
/**
* Interface for tensor attribute used by feature executors to get information.
@@ -24,8 +25,10 @@ public:
virtual std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const = 0;
virtual vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const = 0;
virtual const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const = 0;
+ virtual SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const = 0;
virtual bool supports_extract_cells_ref() const = 0;
virtual bool supports_get_tensor_ref() const = 0;
+ virtual bool supports_get_serialized_tensor_ref() const = 0;
virtual const vespalib::eval::ValueType & getTensorType() const = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
index f9459823ce4..9a7b81ae1fa 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "imported_tensor_attribute_vector_read_guard.h"
+#include "serialized_tensor_ref.h"
#include "vector_bundle.h"
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/eval/eval/value.h>
@@ -79,6 +80,18 @@ ImportedTensorAttributeVectorReadGuard::getTensorType() const
return _target_tensor_attribute.getTensorType();
}
+SerializedTensorRef
+ImportedTensorAttributeVectorReadGuard::get_serialized_tensor_ref(uint32_t docid) const
+{
+ return _target_tensor_attribute.get_serialized_tensor_ref(getTargetLid(docid));
+}
+
+bool
+ImportedTensorAttributeVectorReadGuard::supports_get_serialized_tensor_ref() const
+{
+ return _target_tensor_attribute.supports_get_serialized_tensor_ref();
+}
+
void
ImportedTensorAttributeVectorReadGuard::get_state(const vespalib::slime::Inserter& inserter) const
{
diff --git a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
index f277d39e97d..4e1cc9efd96 100644
--- a/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
+++ b/searchlib/src/vespa/searchlib/tensor/imported_tensor_attribute_vector_read_guard.h
@@ -35,9 +35,11 @@ public:
std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override;
vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override;
const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override;
+ SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override;
bool supports_extract_cells_ref() const override { return _target_tensor_attribute.supports_extract_cells_ref(); }
bool supports_get_tensor_ref() const override { return _target_tensor_attribute.supports_get_tensor_ref(); }
DistanceMetric distance_metric() const override { return _target_tensor_attribute.distance_metric(); }
+ bool supports_get_serialized_tensor_ref() const override;
uint32_t get_num_docs() const override { return getNumDocs(); }
vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
index 6612db1d27e..51ebc22c269 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "serialized_fast_value_attribute.h"
+#include "serialized_tensor_ref.h"
#include <vespa/eval/eval/value.h>
#include <vespa/searchcommon/attribute/config.h>
@@ -26,6 +27,19 @@ SerializedFastValueAttribute::~SerializedFastValueAttribute()
_tensorStore.reclaim_all_memory();
}
+SerializedTensorRef
+SerializedFastValueAttribute::get_serialized_tensor_ref(uint32_t docid) const
+{
+ EntryRef ref = acquire_entry_ref(docid);
+ return _tensorBufferStore.get_serialized_tensor_ref(ref);
+}
+
+bool
+SerializedFastValueAttribute::supports_get_serialized_tensor_ref() const
+{
+ return true;
+}
+
vespalib::eval::TypedCells
SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const
{
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
index 4cfcc3d19a2..9066766fbc4 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
@@ -23,6 +23,9 @@ public:
SerializedFastValueAttribute(vespalib::stringref baseFileName, const Config &cfg, const NearestNeighborIndexFactory& index_factory = DefaultNearestNeighborIndexFactory());
~SerializedFastValueAttribute() override;
+ SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override;
+ bool supports_get_serialized_tensor_ref() const override;
+
// Implements DocVectorAccess
vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
VectorBundle get_vectors(uint32_t docid) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp
new file mode 100644
index 00000000000..1f8ca9ed2fd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.cpp
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "serialized_tensor_ref.h"
+
+namespace search::tensor {
+
+SerializedTensorRef::SerializedTensorRef()
+ : _vectors(),
+ _num_mapped_dimensions(0),
+ _labels()
+{
+}
+
+SerializedTensorRef::SerializedTensorRef(VectorBundle vectors, uint32_t num_mapped_dimensions, vespalib::ConstArrayRef<vespalib::string_id> labels)
+ : _vectors(vectors),
+ _num_mapped_dimensions(num_mapped_dimensions),
+ _labels(labels)
+{
+}
+
+SerializedTensorRef::~SerializedTensorRef() = default;
+
+vespalib::ConstArrayRef<vespalib::string_id>
+SerializedTensorRef::get_labels(uint32_t subspace) const
+{
+ assert(subspace < _vectors.subspaces());
+ return {_labels.data() + subspace * _num_mapped_dimensions, _num_mapped_dimensions};
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h
new file mode 100644
index 00000000000..01ddaadb2ff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_tensor_ref.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "vector_bundle.h"
+#include <vespa/vespalib/util/string_id.h>
+
+namespace search::tensor {
+
+/*
+ * This class contains a reference to a tensor stored in a TensorBufferStore.
+ */
+class SerializedTensorRef
+{
+ VectorBundle _vectors;
+ uint32_t _num_mapped_dimensions;
+ vespalib::ConstArrayRef<vespalib::string_id> _labels; // all subspaces
+public:
+ SerializedTensorRef();
+ SerializedTensorRef(VectorBundle vectors, uint32_t num_mapped_dimensions, vespalib::ConstArrayRef<vespalib::string_id> labels);
+ ~SerializedTensorRef();
+ const VectorBundle& get_vectors() const noexcept { return _vectors; }
+ vespalib::ConstArrayRef<vespalib::string_id> get_labels(uint32_t subspace) const;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
index 9ee8d9fdf46..13dad7fc1f2 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.cpp
@@ -4,6 +4,7 @@
#include "nearest_neighbor_index.h"
#include "nearest_neighbor_index_factory.h"
#include "nearest_neighbor_index_saver.h"
+#include "serialized_tensor_ref.h"
#include "tensor_attribute_constants.h"
#include "tensor_attribute_loader.h"
#include "tensor_attribute_saver.h"
@@ -261,6 +262,18 @@ TensorAttribute::get_tensor_ref(uint32_t /*docid*/) const
notImplemented();
}
+SerializedTensorRef
+TensorAttribute::get_serialized_tensor_ref(uint32_t) const
+{
+ notImplemented();
+}
+
+bool
+TensorAttribute::supports_get_serialized_tensor_ref() const
+{
+ return false;
+}
+
const vespalib::eval::ValueType &
TensorAttribute::getTensorType() const
{
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
index a4c30a574e5..20c8ae60107 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_attribute.h
@@ -63,8 +63,10 @@ public:
std::unique_ptr<vespalib::eval::Value> getEmptyTensor() const override;
vespalib::eval::TypedCells extract_cells_ref(uint32_t docid) const override;
const vespalib::eval::Value& get_tensor_ref(uint32_t docid) const override;
+ SerializedTensorRef get_serialized_tensor_ref(uint32_t docid) const override;
bool supports_extract_cells_ref() const override { return false; }
bool supports_get_tensor_ref() const override { return false; }
+ bool supports_get_serialized_tensor_ref() const override;
const vespalib::eval::ValueType & getTensorType() const override;
const NearestNeighborIndex* nearest_neighbor_index() const override;
void get_state(const vespalib::slime::Inserter& inserter) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 3928b41c2d1..72940cbd6a0 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -3,6 +3,7 @@
#pragma once
#include "empty_subspace.h"
+#include "serialized_tensor_ref.h"
#include "subspace_type.h"
#include "vector_bundle.h"
#include <vespa/vespalib/datastore/aligner.h>
@@ -110,6 +111,13 @@ public:
auto aligner = select_aligner(cells_mem_size);
return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type);
}
+ SerializedTensorRef get_serialized_tensor_ref(vespalib::ConstArrayRef<char> buf) const {
+ auto num_subspaces = get_num_subspaces(buf);
+ auto cells_mem_size = get_cells_mem_size(num_subspaces);
+ auto aligner = select_aligner(cells_mem_size);
+ vespalib::ConstArrayRef<vespalib::string_id> labels(reinterpret_cast<const vespalib::string_id*>(buf.data() + get_labels_offset()), num_subspaces * _num_mapped_dimensions);
+ return SerializedTensorRef(VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), num_subspaces, _subspace_type), _num_mapped_dimensions, labels);
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
index f602836bd32..2e86ff5fb67 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
@@ -44,6 +44,13 @@ public:
auto buf = _array_store.get(ref);
return _ops.get_vectors(buf);
}
+ SerializedTensorRef get_serialized_tensor_ref(EntryRef ref) const {
+ if (!ref.valid()) {
+ return SerializedTensorRef();
+ }
+ auto buf = _array_store.get(ref);
+ return _ops.get_serialized_tensor_ref(buf);
+ }
// Used by unit test
static constexpr uint32_t get_offset_bits() noexcept { return RefType::offset_bits; }