aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-01-05 12:15:48 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-01-05 12:18:36 +0000
commitd2ac897f41951ec86082b419b22e06369322f04c (patch)
tree7afdf555f7832324e91f4dc58edf7448ce35da9f /searchlib
parent74665956e649cb28682ba25d4f2089a7fae5087e (diff)
use minimal tensor entry to keep track of tensor data
use thin value views for evaluation and serialization
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp152
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.h41
3 files changed, 179 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
index d32d021c8ea..260ffa1a388 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
@@ -53,8 +53,8 @@ SerializedFastValueAttribute::getTensor(DocId docId) const
if (!ref.valid()) {
return {};
}
- if (const auto * ptr = _streamedValueStore.get_tensor(ref)) {
- return FastValueBuilderFactory::get().copy(*ptr);
+ if (const auto * ptr = _streamedValueStore.get_tensor_entry(ref)) {
+ return ptr->create_fast_value_view(_tensor_type);
}
return {};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
index 755fc4f7dd0..ef4b711b86f 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
@@ -3,10 +3,12 @@
#include "streamed_value_store.h"
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/fast_value.hpp>
#include <vespa/eval/streamed/streamed_value_builder_factory.h>
#include <vespa/eval/streamed/streamed_value_view.h>
#include <vespa/vespalib/datastore/datastore.hpp>
#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
@@ -15,9 +17,142 @@ LOG_SETUP(".searchlib.tensor.streamed_value_store");
using vespalib::datastore::Handle;
using vespalib::datastore::EntryRef;
using namespace vespalib::eval;
+using vespalib::ConstArrayRef;
+using vespalib::MemoryUsage;
namespace search::tensor {
+//-----------------------------------------------------------------------------
+
+namespace {
+
+template <typename CT, typename F>
+void each_subspace(const Value &value, size_t num_mapped, size_t dense_size, F f) {
+ size_t subspace;
+ std::vector<label_t> addr(num_mapped);
+ std::vector<label_t*> refs;
+ refs.reserve(addr.size());
+ for (label_t &label: addr) {
+ refs.push_back(&label);
+ }
+ auto cells = value.cells().typify<CT>();
+ auto view = value.index().create_view({});
+ view->lookup({});
+ while (view->next_result(refs, subspace)) {
+ size_t offset = subspace * dense_size;
+ f(ConstArrayRef<label_t>(addr), ConstArrayRef<CT>(cells.begin() + offset, dense_size));
+ }
+}
+
+using TensorEntry = StreamedValueStore::TensorEntry;
+
+struct CreateTensorEntry {
+ template <typename CT>
+ static TensorEntry::SP invoke(const Value &value, size_t num_mapped, size_t dense_size) {
+ using EntryImpl = StreamedValueStore::TensorEntryImpl<CT>;
+ return std::make_shared<EntryImpl>(value, num_mapped, dense_size);
+ }
+};
+
+using HandleView = vespalib::SharedStringRepo::HandleView;
+
+struct MyFastValueView final : Value {
+ const ValueType &my_type;
+ FastValueIndex my_index;
+ TypedCells my_cells;
+ MyFastValueView(const ValueType &type_ref, HandleView handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces)
+ : my_type(type_ref),
+ my_index(num_mapped, handle_view, num_spaces),
+ my_cells(cells)
+ {
+ const std::vector<label_t> &labels = handle_view.handles();
+ for (size_t i = 0; i < num_spaces; ++i) {
+ ConstArrayRef<label_t> addr(&labels[i * num_mapped], num_mapped);
+ my_index.map.add_mapping(FastAddrMap::hash_labels(addr));
+ }
+ assert(my_index.map.size() == num_spaces);
+ }
+ const ValueType &type() const override { return my_type; }
+ const Value::Index &index() const override { return my_index; }
+ TypedCells cells() const override { return my_cells; }
+ MemoryUsage get_memory_usage() const override {
+ MemoryUsage usage = self_memory_usage<MyFastValueView>();
+ usage.merge(my_index.map.estimate_extra_memory_usage());
+ return usage;
+ }
+};
+
+} // <unnamed>
+
+//-----------------------------------------------------------------------------
+
+StreamedValueStore::TensorEntry::~TensorEntry() = default;
+
+StreamedValueStore::TensorEntry::SP
+StreamedValueStore::TensorEntry::create_shared_entry(const Value &value)
+{
+ size_t num_mapped = value.type().count_mapped_dimensions();
+ size_t dense_size = value.type().dense_subspace_size();
+ return vespalib::typify_invoke<1,TypifyCellType,CreateTensorEntry>(value.type().cell_type(), value, num_mapped, dense_size);
+}
+
+template <typename CT>
+StreamedValueStore::TensorEntryImpl<CT>::TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size)
+ : handles(num_mapped * value.index().size()),
+ cells()
+{
+ cells.reserve(dense_size * value.index().size());
+ auto store_subspace = [&](auto addr, auto data) {
+ for (label_t label: addr) {
+ handles.add(label);
+ }
+ for (CT entry: data) {
+ cells.push_back(entry);
+ }
+ };
+ each_subspace<CT>(value, num_mapped, dense_size, store_subspace);
+}
+
+template <typename CT>
+Value::UP
+StreamedValueStore::TensorEntryImpl<CT>::create_fast_value_view(const ValueType &type_ref) const
+{
+ size_t num_mapped = type_ref.count_mapped_dimensions();
+ size_t dense_size = type_ref.dense_subspace_size();
+ size_t num_spaces = cells.size() / dense_size;
+ assert(dense_size * num_spaces == cells.size());
+ assert(num_mapped * num_spaces == handles.view().handles().size());
+ return std::make_unique<MyFastValueView>(type_ref, handles.view(), TypedCells(cells), num_mapped, num_spaces);
+}
+
+template <typename CT>
+void
+StreamedValueStore::TensorEntryImpl<CT>::encode_value(const ValueType &type, vespalib::nbostream &target) const
+{
+ size_t num_mapped = type.count_mapped_dimensions();
+ size_t dense_size = type.dense_subspace_size();
+ size_t num_spaces = cells.size() / dense_size;
+ assert(dense_size * num_spaces == cells.size());
+ assert(num_mapped * num_spaces == handles.view().handles().size());
+ StreamedValueView my_value(type, num_mapped, TypedCells(cells), num_spaces, handles.view().handles());
+ ::vespalib::eval::encode_value(my_value, target);
+}
+
+template <typename CT>
+MemoryUsage
+StreamedValueStore::TensorEntryImpl<CT>::get_memory_usage() const
+{
+ MemoryUsage usage = self_memory_usage<TensorEntryImpl<CT>>();
+ usage.merge(vector_extra_memory_usage(handles.view().handles()));
+ usage.merge(vector_extra_memory_usage(cells));
+ return usage;
+}
+
+template <typename CT>
+StreamedValueStore::TensorEntryImpl<CT>::~TensorEntryImpl() = default;
+
+//-----------------------------------------------------------------------------
+
constexpr size_t MIN_BUFFER_ARRAYS = 8192;
StreamedValueStore::TensorBufferType::TensorBufferType()
@@ -28,7 +163,7 @@ StreamedValueStore::TensorBufferType::TensorBufferType()
void
StreamedValueStore::TensorBufferType::cleanHold(void* buffer, size_t offset, size_t num_elems, CleanContext clean_ctx)
{
- TensorSP* elem = static_cast<TensorSP*>(buffer) + offset;
+ TensorEntry::SP* elem = static_cast<TensorEntry::SP*>(buffer) + offset;
for (size_t i = 0; i < num_elems; ++i) {
clean_ctx.extraBytesCleaned((*elem)->get_memory_usage().allocatedBytes());
*elem = _emptyEntry;
@@ -47,7 +182,7 @@ StreamedValueStore::StreamedValueStore(const ValueType &tensor_type)
StreamedValueStore::~StreamedValueStore() = default;
EntryRef
-StreamedValueStore::add_entry(TensorSP tensor)
+StreamedValueStore::add_entry(TensorEntry::SP tensor)
{
auto ref = _concrete_store.addEntry(tensor);
auto& state = _concrete_store.getBufferState(RefType(ref).bufferId());
@@ -55,8 +190,8 @@ StreamedValueStore::add_entry(TensorSP tensor)
return ref;
}
-const vespalib::eval::Value *
-StreamedValueStore::get_tensor(EntryRef ref) const
+const StreamedValueStore::TensorEntry *
+StreamedValueStore::get_tensor_entry(EntryRef ref) const
{
if (!ref.valid()) {
return nullptr;
@@ -93,8 +228,8 @@ StreamedValueStore::move(EntryRef ref)
bool
StreamedValueStore::encode_tensor(EntryRef ref, vespalib::nbostream &target) const
{
- if (const auto * val = get_tensor(ref)) {
- vespalib::eval::encode_value(*val, target);
+ if (const auto * entry = get_tensor_entry(ref)) {
+ entry->encode_value(_tensor_type, target);
return true;
} else {
return false;
@@ -105,8 +240,7 @@ TensorStore::EntryRef
StreamedValueStore::store_tensor(const Value &tensor)
{
assert(tensor.type() == _tensor_type);
- auto val = StreamedValueBuilderFactory::get().copy(tensor);
- return add_entry(TensorSP(std::move(val)));
+ return add_entry(TensorEntry::create_shared_entry(tensor));
}
TensorStore::EntryRef
@@ -114,7 +248,7 @@ StreamedValueStore::store_encoded_tensor(vespalib::nbostream &encoded)
{
const auto &factory = StreamedValueBuilderFactory::get();
auto val = vespalib::eval::decode_value(encoded, factory);
- return add_entry(TensorSP(std::move(val)));
+ return store_tensor(*val);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
index ff40c260ff9..3a9d9a0b7b4 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
@@ -7,7 +7,7 @@
#include <vespa/eval/eval/value.h>
#include <vespa/eval/streamed/streamed_value.h>
#include <vespa/vespalib/objects/nbostream.h>
-#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace search::tensor {
@@ -15,15 +15,42 @@ namespace search::tensor {
* Class for StreamedValue tensors in memory.
*/
class StreamedValueStore : public TensorStore {
+public:
+ using Value = vespalib::eval::Value;
+ using ValueType = vespalib::eval::ValueType;
+ using Handles = vespalib::SharedStringRepo::StrongHandles;
+ using MemoryUsage = vespalib::MemoryUsage;
+
+ // interface for tensor entries
+ struct TensorEntry {
+ using SP = std::shared_ptr<TensorEntry>;
+ virtual Value::UP create_fast_value_view(const ValueType &type_ref) const = 0;
+ virtual void encode_value(const ValueType &type, vespalib::nbostream &target) const = 0;
+ virtual MemoryUsage get_memory_usage() const = 0;
+ virtual ~TensorEntry();
+ static TensorEntry::SP create_shared_entry(const Value &value);
+ };
+
+ // implementation of tensor entries
+ template <typename CT>
+ struct TensorEntryImpl : public TensorEntry {
+ Handles handles;
+ std::vector<CT> cells;
+ TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size);
+ Value::UP create_fast_value_view(const ValueType &type_ref) const override;
+ void encode_value(const ValueType &type, vespalib::nbostream &target) const override;
+ MemoryUsage get_memory_usage() const override;
+ ~TensorEntryImpl() override;
+ };
+
private:
// Note: Must use SP (instead of UP) because of fallbackCopy() and initializeReservedElements() in BufferType,
// and implementation of move().
- using TensorSP = std::shared_ptr<vespalib::eval::Value>;
- using TensorStoreType = vespalib::datastore::DataStore<TensorSP>;
+ using TensorStoreType = vespalib::datastore::DataStore<TensorEntry::SP>;
- class TensorBufferType : public vespalib::datastore::BufferType<TensorSP> {
+ class TensorBufferType : public vespalib::datastore::BufferType<TensorEntry::SP> {
private:
- using ParentType = BufferType<TensorSP>;
+ using ParentType = BufferType<TensorEntry::SP>;
using ParentType::_emptyEntry;
using CleanContext = typename ParentType::CleanContext;
public:
@@ -32,7 +59,7 @@ private:
};
TensorStoreType _concrete_store;
const vespalib::eval::ValueType _tensor_type;
- EntryRef add_entry(TensorSP tensor);
+ EntryRef add_entry(TensorEntry::SP tensor);
public:
StreamedValueStore(const vespalib::eval::ValueType &tensor_type);
~StreamedValueStore() override;
@@ -42,7 +69,7 @@ public:
void holdTensor(EntryRef ref) override;
EntryRef move(EntryRef ref) override;
- const vespalib::eval::Value * get_tensor(EntryRef ref) const;
+ const TensorEntry * get_tensor_entry(EntryRef ref) const;
bool encode_tensor(EntryRef ref, vespalib::nbostream &target) const;
EntryRef store_tensor(const vespalib::eval::Value &tensor);