summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-11-03 15:10:06 +0100
committerTor Egge <Tor.Egge@online.no>2022-11-03 15:10:06 +0100
commitfadba4cea932585e1c0551de42f1c75d1f517667 (patch)
tree683e026aa6805e2de03848563ab0267af45232cb /searchlib
parent1ec6225890971a2fce57ea006a4dd9ae94926755 (diff)
Add search::tensor::DocVectorAccess::get_vectors() member function.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp13
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp6
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp7
-rw-r--r--searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/doc_vector_access.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/vector_bundle.h46
16 files changed, 125 insertions, 33 deletions
diff --git a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
index cf0656fc919..bd62e8a7f3c 100644
--- a/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
+++ b/searchlib/src/tests/tensor/direct_tensor_store/direct_tensor_store_test.cpp
@@ -125,24 +125,21 @@ TEST_F(DirectTensorStoreTest, move_on_compact_allocates_new_entry_and_leaves_old
EXPECT_GT(mem_2.usedBytes(), mem_1.usedBytes() + tensor_mem_usage.allocatedBytes());
}
-TEST_F(DirectTensorStoreTest, get_typed_cells)
+TEST_F(DirectTensorStoreTest, get_vectors)
{
auto tensor_spec = TensorSpec(tensor_type_spec).add({{"x", "a"}}, 4.5).add({{"x", "b"}}, 5.5).add({{"x", "c"}}, 6.5).add({{"x", "d"}}, 7.5);
auto tensor = value_from_spec(tensor_spec, FastValueBuilderFactory::get());
auto ref = store.store_tensor(std::move(tensor));
std::vector<double> values;
+ auto vectors = store.get_vectors(ref);
+ EXPECT_EQ(4, vectors.subspaces());
for (uint32_t subspace = 0; subspace < 4; ++subspace) {
- auto cells = store.get_typed_cells(ref, subspace).typify<double>();
+ auto cells = vectors.cells(subspace).typify<double>();
EXPECT_EQ(1, cells.size());
values.emplace_back(cells[0]);
}
EXPECT_EQ((std::vector<double>{4.5, 5.5, 6.5, 7.5}), values);
- for (auto tref : { ref, EntryRef() }) {
- auto subspace = tref.valid() ? 4 : 0;
- auto cells = store.get_typed_cells(tref, subspace).typify<double>();
- EXPECT_EQ(1, cells.size());
- EXPECT_EQ(0.0, cells[0]);
- }
+ EXPECT_EQ(0, store.get_vectors(EntryRef()).subspaces());
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 8d3d389090b..1738ee510c8 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/random_level_generator.h>
#include <vespa/searchlib/tensor/inv_log_level_generator.h>
+#include <vespa/searchlib/tensor/vector_bundle.h>
#include <vespa/searchlib/queryeval/global_filter.h>
#include <vespa/vespalib/datastore/compaction_spec.h>
#include <vespa/vespalib/datastore/compaction_strategy.h>
@@ -23,6 +24,7 @@ using namespace search::tensor;
using namespace vespalib::slime;
using vespalib::Slime;
using search::BitVector;
+using vespalib::eval::get_cell_type;
using vespalib::datastore::CompactionSpec;
using vespalib::datastore::CompactionStrategy;
using search::queryeval::GlobalFilter;
@@ -48,6 +50,10 @@ public:
ArrayRef ref(_vectors[docid]);
return vespalib::eval::TypedCells(ref);
}
+ VectorBundle get_vectors(uint32_t docid) const override {
+ ArrayRef ref(_vectors[docid]);
+ return VectorBundle(ref.data(), get_cell_type<FloatType>(), 1, ref.size() * sizeof(FloatType), ref.size());
+ }
void clear() { _vectors.clear(); }
};
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index c5c88d2eeff..4cc24657a00 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -15,6 +15,7 @@
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/inv_log_level_generator.h>
#include <vespa/searchlib/tensor/random_level_generator.h>
+#include <vespa/searchlib/tensor/vector_bundle.h>
#include <vespa/vespalib/data/input.h>
#include <vespa/vespalib/data/memory_input.h>
#include <vespa/vespalib/data/slime/slime.h>
@@ -31,6 +32,7 @@ LOG_SETUP("stress_hnsw_mt");
using namespace search::tensor;
using namespace vespalib::slime;
using search::BitVector;
+using vespalib::eval::CellType;
using vespalib::GenerationHandler;
using vespalib::MemoryUsage;
using vespalib::Slime;
@@ -116,6 +118,11 @@ public:
ConstVectorRef ref(_vectors[docid]);
return vespalib::eval::TypedCells(ref);
}
+ VectorBundle get_vectors(uint32_t docid) const override {
+ assert(docid < NUM_POSSIBLE_DOCS);
+ ConstVectorRef ref(_vectors[docid]);
+ return VectorBundle(ref.data(), CellType::FLOAT, 1, sizeof(float) * NUM_DIMS, NUM_DIMS);
+ }
};
using FloatSqEuclideanDistance = SquaredEuclideanDistanceHW<float>;
diff --git a/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp
index 05e40200167..ec7fc2334c4 100644
--- a/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp
+++ b/searchlib/src/tests/tensor/tensor_buffer_store/tensor_buffer_store_test.cpp
@@ -162,22 +162,19 @@ TEST_F(TensorBufferStoreTest, stored_tensor_can_be_encoded_and_stored_as_encoded
}
}
-TEST_F(TensorBufferStoreTest, get_typed_cells)
+TEST_F(TensorBufferStoreTest, get_vectors)
{
auto ref = store_tensor(tensor_specs.back());
std::vector<double> values;
+ auto vectors = _store.get_vectors(ref);
+ EXPECT_EQ(4, vectors.subspaces());
for (uint32_t subspace = 0; subspace < 4; ++subspace) {
- auto cells = _store.get_typed_cells(ref, subspace).typify<double>();
+ auto cells = vectors.cells(subspace).typify<double>();
EXPECT_EQ(1, cells.size());
values.emplace_back(cells[0]);
}
EXPECT_EQ((std::vector<double>{4.5, 5.5, 6.5, 7.5}), values);
- for (auto tref : { ref, EntryRef() }) {
- auto subspace = tref.valid() ? 4 : 0;
- auto cells = _store.get_typed_cells(tref, subspace).typify<double>();
- EXPECT_EQ(1, cells.size());
- EXPECT_EQ(0.0, cells[0]);
- }
+ EXPECT_EQ(0, _store.get_vectors(EntryRef()).subspaces());
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index e69a6d925e9..fd94c4eb60c 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -99,4 +99,11 @@ DenseTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const
return _denseTensorStore.get_typed_cells(ref);
}
+VectorBundle
+DenseTensorAttribute::get_vectors(uint32_t docid) const
+{
+ EntryRef ref = acquire_entry_ref(docid);
+ return _denseTensorStore.get_vectors(ref);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
index dc398b7ec6f..2db7bb332a9 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.h
@@ -35,6 +35,7 @@ public:
// Implements DocVectorAccess
vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
+ VectorBundle get_vectors(uint32_t docid) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
index 9d0ad6536b0..7b133977073 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_store.h
@@ -4,6 +4,7 @@
#include "tensor_store.h"
#include "empty_subspace.h"
+#include "vector_bundle.h"
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/typed_cells.h>
#include <vespa/vespalib/datastore/datastore.h>
@@ -81,6 +82,12 @@ public:
return vespalib::eval::TypedCells(getRawBuffer(ref),
_type.cell_type(), getNumCells());
}
+ VectorBundle get_vectors(EntryRef ref) const {
+ if (!ref.valid()) {
+ return VectorBundle();
+ }
+ return VectorBundle(getRawBuffer(ref), _type.cell_type(), 1, getBufSize(), getNumCells());
+ }
// The following method is meant to be used only for unit tests.
uint32_t getArraySize() const { return _bufferType.getArraySize(); }
};
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
index f1dd7238805..c2f0ff36c3a 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.cpp
@@ -77,7 +77,15 @@ vespalib::eval::TypedCells
DirectTensorAttribute::get_vector(uint32_t docid, uint32_t subspace) const
{
EntryRef ref = acquire_entry_ref(docid);
- return _direct_store.get_typed_cells(ref, subspace);
+ auto vectors = _direct_store.get_vectors(ref);
+ return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _direct_store.get_empty_subspace();
+}
+
+VectorBundle
+DirectTensorAttribute::get_vectors(uint32_t docid) const
+{
+ EntryRef ref = acquire_entry_ref(docid);
+ return _direct_store.get_vectors(ref);
}
} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
index 98c34ec1060..ed48ea20e0f 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_attribute.h
@@ -26,6 +26,7 @@ public:
// Implements DocVectorAccess
vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
+ VectorBundle get_vectors(uint32_t docid) const override;
};
} // namespace search::tensor
diff --git a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
index e11c215d44c..a84c321c13a 100644
--- a/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/direct_tensor_store.h
@@ -4,6 +4,7 @@
#include "tensor_store.h"
#include "empty_subspace.h"
+#include "vector_bundle.h"
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/datastore/datastore.h>
@@ -59,16 +60,17 @@ public:
EntryRef store_encoded_tensor(vespalib::nbostream& encoded) override;
std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const override;
bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const override;
- vespalib::eval::TypedCells get_typed_cells(EntryRef ref, uint32_t subspace) const {
+ vespalib::eval::TypedCells get_empty_subspace() const noexcept {
+ return _empty.cells();
+ }
+ VectorBundle get_vectors(EntryRef ref) const {
auto tensor = get_tensor_ptr(ref);
- if (tensor == nullptr || subspace >= tensor->index().size()) {
- return _empty.cells();
+ if (tensor == nullptr) {
+ return VectorBundle();
}
- auto cells = tensor->cells();
auto type = tensor->type();
- auto data = static_cast<const char *>(cells.data);
- auto dense_subspace_size = type.dense_subspace_size();
- return vespalib::eval::TypedCells(data + vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace * dense_subspace_size), cells.type, dense_subspace_size);
+ auto subspace_size = type.dense_subspace_size();
+ return VectorBundle(tensor->cells().data, type.cell_type(), tensor->index().size(), vespalib::eval::CellTypeUtils::mem_size(type.cell_type(), subspace_size), subspace_size);
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
index 7a64c1d28ae..ab1d8d331d9 100644
--- a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
+++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
@@ -7,6 +7,8 @@
namespace search::tensor {
+class VectorBundle;
+
/**
* Interface that provides access to the vector that is associated with the the given document id.
*
@@ -16,6 +18,7 @@ class DocVectorAccess {
public:
virtual ~DocVectorAccess() {}
virtual vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const = 0;
+ virtual VectorBundle get_vectors(uint32_t docid) const = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
index 3ae592e6e6f..4fd8da5ac9d 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
@@ -30,7 +30,15 @@ vespalib::eval::TypedCells
SerializedFastValueAttribute::get_vector(uint32_t docid, uint32_t subspace) const
{
EntryRef ref = acquire_entry_ref(docid);
- return _tensorBufferStore.get_typed_cells(ref, subspace);
+ auto vectors = _tensorBufferStore.get_vectors(ref);
+ return (subspace < vectors.subspaces()) ? vectors.cells(subspace) : _tensorBufferStore.get_empty_subspace();
+}
+
+VectorBundle
+SerializedFastValueAttribute::get_vectors(uint32_t docid) const
+{
+ EntryRef ref = acquire_entry_ref(docid);
+ return _tensorBufferStore.get_vectors(ref);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
index 6215dbbc461..31a7f136d23 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
@@ -24,6 +24,7 @@ public:
// Implements DocVectorAccess
vespalib::eval::TypedCells get_vector(uint32_t docid, uint32_t subspace) const override;
+ VectorBundle get_vectors(uint32_t docid) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
index 963e9b99920..43463e50ff1 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_operations.h
@@ -3,6 +3,7 @@
#pragma once
#include "empty_subspace.h"
+#include "vector_bundle.h"
#include <vespa/vespalib/datastore/aligner.h>
#include <vespa/vespalib/util/arrayref.h>
#include <vespa/vespalib/util/string_id.h>
@@ -103,14 +104,11 @@ public:
vespalib::eval::TypedCells get_empty_subspace() const noexcept {
return _empty.cells();
}
- vespalib::eval::TypedCells get_typed_cells(vespalib::ConstArrayRef<char> buf, uint32_t subspace) const {
+ VectorBundle get_vectors(vespalib::ConstArrayRef<char> buf) const {
auto num_subspaces = get_num_subspaces(buf);
- if (subspace >= num_subspaces) {
- return _empty.cells();
- }
auto cells_mem_size = get_cells_mem_size(num_subspaces);
auto aligner = select_aligner(cells_mem_size);
- return vespalib::eval::TypedCells(buf.data() + get_cells_offset(num_subspaces, aligner) + get_cells_mem_size(subspace), _cell_type, _dense_subspace_size);
+ return VectorBundle(buf.data() + get_cells_offset(num_subspaces, aligner), _cell_type, num_subspaces, _dense_subspace_size * _cell_mem_size, _dense_subspace_size);
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
index 071e238d5cc..ce00977c298 100644
--- a/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/tensor_buffer_store.h
@@ -34,12 +34,15 @@ public:
EntryRef store_encoded_tensor(vespalib::nbostream& encoded) override;
std::unique_ptr<vespalib::eval::Value> get_tensor(EntryRef ref) const override;
bool encode_stored_tensor(EntryRef ref, vespalib::nbostream& target) const override;
- vespalib::eval::TypedCells get_typed_cells(EntryRef ref, uint32_t subspace) const {
+ vespalib::eval::TypedCells get_empty_subspace() const noexcept {
+ return _ops.get_empty_subspace();
+ }
+ VectorBundle get_vectors(EntryRef ref) const {
if (!ref.valid()) {
- return _ops.get_empty_subspace();
+ return VectorBundle();
}
auto buf = _array_store.get(ref);
- return _ops.get_typed_cells(buf, subspace);
+ return _ops.get_vectors(buf);
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
new file mode 100644
index 00000000000..cb1d3f99cfb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
@@ -0,0 +1,46 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/typed_cells.h>
+#include <cassert>
+
+namespace search::tensor {
+
+/*
+ * Class referencing the cells owned by a tensor in a form suitable to extract tensor cells for
+ * a subspace.
+ */
+class VectorBundle
+{
+ const void* _data;
+ vespalib::eval::CellType _cell_type;
+ uint32_t _subspaces;
+ size_t _subspace_mem_size;
+ size_t _subspace_size;
+public:
+ VectorBundle()
+ : _data(nullptr),
+ _cell_type(vespalib::eval::CellType::DOUBLE),
+ _subspaces(0),
+ _subspace_mem_size(0),
+ _subspace_size(0)
+ {
+ }
+ VectorBundle(const void *data, vespalib::eval::CellType cell_type, uint32_t subspaces, size_t subspace_mem_size, size_t subspace_size)
+ : _data(data),
+ _cell_type(cell_type),
+ _subspaces(subspaces),
+ _subspace_mem_size(subspace_mem_size),
+ _subspace_size(subspace_size)
+ {
+ }
+ ~VectorBundle() = default;
+ uint32_t subspaces() const noexcept { return _subspaces; }
+ const vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept {
+ assert(subspace < _subspaces);
+ return vespalib::eval::TypedCells(static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size);
+ }
+};
+
+}