diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-11-17 20:52:33 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-11-23 11:04:18 +0000 |
commit | 359903c03748d520357067d03e7ac8eed7c5a19b (patch) | |
tree | 27abd343d5e782340f474d053f2740259c5a206a | |
parent | d64873a41e816667264a2272ba879e30fb93863d (diff) |
add StreamedValue
-rw-r--r-- | eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/CMakeLists.txt | 1 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/CMakeLists.txt | 11 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value.cpp | 24 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value.h | 40 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_builder.cpp | 13 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_builder.h | 61 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp | 33 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_builder_factory.h | 24 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_index.cpp | 103 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_index.h | 32 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_utils.cpp | 9 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_utils.h | 64 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_view.cpp | 9 | ||||
-rw-r--r-- | eval/src/vespa/eval/streamed/streamed_value_view.h | 38 |
15 files changed, 463 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 76c76d64b79..5d012e50da6 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -92,6 +92,7 @@ vespa_define_module( src/vespa/eval/eval/value_cache src/vespa/eval/gp src/vespa/eval/instruction + src/vespa/eval/streamed src/vespa/eval/tensor src/vespa/eval/tensor/dense src/vespa/eval/tensor/mixed diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt index ee9a793bba0..10167c9d74a 100644 --- a/eval/src/vespa/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(vespaeval $<TARGET_OBJECTS:eval_eval_test> $<TARGET_OBJECTS:eval_eval_value_cache> $<TARGET_OBJECTS:eval_gp> + $<TARGET_OBJECTS:eval_streamed> $<TARGET_OBJECTS:eval_tensor> $<TARGET_OBJECTS:eval_tensor_dense> $<TARGET_OBJECTS:eval_tensor_mixed> diff --git a/eval/src/vespa/eval/streamed/CMakeLists.txt b/eval/src/vespa/eval/streamed/CMakeLists.txt new file mode 100644 index 00000000000..ee928d7b2c9 --- /dev/null +++ b/eval/src/vespa/eval/streamed/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_library(eval_streamed OBJECT + SOURCES + streamed_value.cpp + streamed_value_index.cpp + streamed_value_utils.cpp + streamed_value_builder.cpp + streamed_value_builder_factory.cpp + streamed_value_view.cpp +) diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp new file mode 100644 index 00000000000..2826a712cc9 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value.h" +#include <vespa/log/log.h> + +LOG_SETUP(".vespalib.eval.streamed.streamed_value"); + +namespace vespalib::eval { + +template <typename T> +StreamedValue<T>::~StreamedValue() = default; + +template <typename T> +MemoryUsage +StreamedValue<T>::get_memory_usage() const +{ + return self_memory_usage<StreamedValue<T>>(); +} + +template class StreamedValue<double>; +template class StreamedValue<float>; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h new file mode 100644 index 00000000000..bb9bb1f30b8 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -0,0 +1,40 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" + +namespace vespalib::eval { + +template <typename T> +class StreamedValue : public Value +{ +private: + ValueType _type; + std::vector<T> _my_cells; + Array<char> _label_buf; + StreamedValueIndex _my_index; + +public: + StreamedValue(ValueType type, std::vector<T> cells, size_t num_ss, Array<char> && label_buf) + : _type(std::move(type)), + _my_cells(std::move(cells)), + _label_buf(std::move(label_buf)), + _my_index(_type.count_mapped_dimensions(), + num_ss, + ConstArrayRef<char>(_label_buf.begin(), _label_buf.size())) + { + if (num_ss * _type.dense_subspace_size() != _my_cells.size()) abort(); + } + + ~StreamedValue(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return TypedCells(_my_cells); } + const Value::Index &index() const override { return _my_index; } + MemoryUsage get_memory_usage() const final override; + auto serialize_index() const { return _my_index.serialize(); } +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp new file mode 100644 index 00000000000..957121c42b7 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp @@ -0,0 +1,13 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +template<typename T> +StreamedValueBuilder<T>::~StreamedValueBuilder() = default; + +template class StreamedValueBuilder<double>; +template class StreamedValueBuilder<float>; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h new file mode 100644 index 00000000000..ca75dac840b --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -0,0 +1,61 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + +template <typename T> +class StreamedValueBuilder : public ValueBuilder<T> +{ +private: + ValueType _type; + size_t _dsss; + std::vector<T> _cells; + size_t _num_subspaces; + nbostream _labels; +public: + StreamedValueBuilder(const ValueType &type, + size_t num_mapped_in, + size_t subspace_size_in, + size_t expected_subspaces) + : _type(type), + _dsss(subspace_size_in), + _cells(), + _num_subspaces(0), + _labels() + { + _cells.reserve(subspace_size_in * expected_subspaces); + // assume small sized label strings: + _labels.reserve(num_mapped_in * expected_subspaces * 3); + }; + + ~StreamedValueBuilder(); + + ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override { + for (auto label : addr) { + _labels.writeSmallString(label); + } + size_t old_sz = _cells.size(); + _cells.resize(old_sz + _dsss); + _num_subspaces++; + return ArrayRef<T>(&_cells[old_sz], _dsss); + } + + std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) override { + if (_num_subspaces == 0 && _type.count_mapped_dimensions() == 0) { + // add required dense subspace + add_subspace({}); + } + // note: _num_subspaces * _dsss == _cells.size() + return std::make_unique<StreamedValue<T>>(std::move(_type), + std::move(_cells), + _num_subspaces, + _labels.extract_buffer()); + } + +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp new file mode 100644 index 00000000000..550331d1905 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp @@ -0,0 +1,33 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder_factory.h" +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +struct SelectStreamedValueBuilder { + template <typename T, typename ...Args> + static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type, Args &&...args) + { + assert(check_cell_type<T>(type.cell_type())); + return std::make_unique<StreamedValueBuilder<T>>(type, std::forward<Args>(args)...); + } +}; + +std::unique_ptr<ValueBuilderBase> +StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type, + size_t num_mapped_in, + size_t subspace_size_in, + size_t expected_subspaces) const +{ + return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>( + type.cell_type(), + type, num_mapped_in, subspace_size_in, expected_subspaces); +} + +StreamedValueBuilderFactory::~StreamedValueBuilderFactory() = default; +StreamedValueBuilderFactory StreamedValueBuilderFactory::_factory; + +} // namespace + + diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h new file mode 100644 index 00000000000..3f81981f429 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" + +namespace vespalib::eval { + +/** + * A factory that can generate appropriate ValueBuilder instances + */ +struct StreamedValueBuilderFactory : ValueBuilderFactory { +private: + StreamedValueBuilderFactory() {} + static StreamedValueBuilderFactory _factory; + std::unique_ptr<ValueBuilderBase> create_value_builder_base( + const ValueType &type, size_t num_mapped_in, + size_t subspace_size_in, size_t expected_subspaces) const override; +public: + static const StreamedValueBuilderFactory &get() { return _factory; } + ~StreamedValueBuilderFactory(); +}; + +} diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp new file mode 100644 index 00000000000..5e50765255e --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -0,0 +1,103 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_index.h" +#include "streamed_value_utils.h" + +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/log/log.h> + +LOG_SETUP(".searchlib.tensor.streamed_value_index"); + +namespace vespalib::eval { + +namespace { + +struct StreamedFilterView : Value::Index::View +{ + LabelBlockStream label_blocks; + std::vector<size_t> view_dims; + std::vector<vespalib::stringref> to_match; + + StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) + : label_blocks(std::move(labels)), + view_dims(std::move(view_dims_in)), + to_match() + { + to_match.reserve(view_dims.size()); + } + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + to_match.clear(); + for (auto ptr : addr) { + to_match.push_back(*ptr); + } + assert(view_dims.size() == to_match.size()); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + while (const auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + bool matches = true; + size_t out_idx = 0; + size_t vdm_idx = 0; + for (size_t dim = 0; dim < block.address.size(); ++dim) { + if (vdm_idx < view_dims.size() && (view_dims[vdm_idx] == dim)) { + if (block.address[dim] != to_match[vdm_idx]) { + matches = false; + } + ++vdm_idx; + } else { + *addr_out[out_idx++] = block.address[dim]; + } + } + assert(out_idx == addr_out.size()); + assert(vdm_idx == view_dims.size()); + if (matches) return true; + } + return false; + } +}; + +struct StreamedIterationView : Value::Index::View +{ + LabelBlockStream label_blocks; + + StreamedIterationView(LabelBlockStream labels) + : label_blocks(std::move(labels)) + {} + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + assert(addr.size() == 0); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + if (auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + size_t i = 0; + for (auto ptr : addr_out) { + *ptr = block.address[i++]; + } + assert(i == block.address.size()); + return true; + } + return false; + } +}; + +} // namespace <unnamed> + +std::unique_ptr<Value::Index::View> +StreamedValueIndex::create_view(const std::vector<size_t> &dims) const +{ + LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims); + if (dims.empty()) { + return std::make_unique<StreamedIterationView>(std::move(label_stream)); + } + return std::make_unique<StreamedFilterView>(std::move(label_stream), dims); +} + +} // namespace vespalib::eval diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h new file mode 100644 index 00000000000..f2571bd27c4 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -0,0 +1,32 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> + +namespace vespalib::eval { + +class StreamedValueIndex : public Value::Index +{ +public: + struct SerializedForm { + uint32_t num_mapped_dims; + uint32_t num_subspaces; + ConstArrayRef<char> labels_buffer; + }; + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf) + : _data{num_mapped_dims, num_subspaces, labels_buf} + {} + + // index API: + size_t size() const override { return _data.num_subspaces; } + std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; + + SerializedForm serialize() const { return _data; } + +private: + SerializedForm _data; +}; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.cpp b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp new file mode 100644 index 00000000000..1b4a91a9080 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_utils.h" + +namespace vespalib::eval { + +LabelBlockStream::~LabelBlockStream() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h new file mode 100644 index 00000000000..7610e2cacd4 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -0,0 +1,64 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + +struct LabelStream { + nbostream_longlivedbuf source; + LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {} + vespalib::stringref next_label() { + size_t str_size = source.getInt1_4Bytes(); + vespalib::stringref label(source.peek(), str_size); + source.adjustReadPos(str_size); + return label; + } + void reset() { source.rp(0); } +}; + +struct LabelBlock { + static constexpr size_t npos = -1; + size_t ss_idx; + ConstArrayRef<vespalib::stringref> address; + operator bool() const { return ss_idx != npos; } +}; + +class LabelBlockStream { +private: + size_t _num_subspaces; + LabelStream _labels; + size_t _subspace_index; + std::vector<vespalib::stringref> _current_address; +public: + LabelBlock next_block() { + if (_subspace_index < _num_subspaces) { + for (auto & label : _current_address) { + label = _labels.next_label(); + } + return LabelBlock{_subspace_index++, _current_address}; + } else { + return LabelBlock{LabelBlock::npos, {}}; + } + } + + void reset() { + _subspace_index = 0; + _labels.reset(); + } + + LabelBlockStream(uint32_t num_subspaces, + ConstArrayRef<char> label_buf, + uint32_t num_mapped_dims) + : _num_subspaces(num_subspaces), + _labels(label_buf), + _subspace_index(-1), + _current_address(num_mapped_dims) + {} + + ~LabelBlockStream(); +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.cpp b/eval/src/vespa/eval/streamed/streamed_value_view.cpp new file mode 100644 index 00000000000..87e1e676692 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_view.h" + +namespace vespalib::eval { + +StreamedValueView::~StreamedValueView() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h new file mode 100644 index 00000000000..5dd428bee38 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -0,0 +1,38 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" + +namespace vespalib::eval { + +class StreamedValueView : public Value +{ +private: + const ValueType &_type; + TypedCells _cells_ref; + StreamedValueIndex _my_index; + +public: + StreamedValueView(const ValueType &type, TypedCells cells, + size_t num_ss, ConstArrayRef<char> labels_buf) + : _type(type), + _cells_ref(cells), + _my_index(_type.count_mapped_dimensions(), num_ss, labels_buf) + { + if (num_ss * _type.dense_subspace_size() != _cells_ref.size) abort(); + } + + ~StreamedValueView(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return _cells_ref; } + const Value::Index &index() const override { return _my_index; } + MemoryUsage get_memory_usage() const final override { + return self_memory_usage<StreamedValueView>(); + } + auto serialize_index() const { return _my_index.serialize(); } +}; + +} // namespace |