aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-11-17 20:52:33 +0000
committerArne Juul <arnej@verizonmedia.com>2020-11-23 11:04:18 +0000
commit359903c03748d520357067d03e7ac8eed7c5a19b (patch)
tree27abd343d5e782340f474d053f2740259c5a206a
parentd64873a41e816667264a2272ba879e30fb93863d (diff)
add StreamedValue
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/streamed/CMakeLists.txt11
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.cpp24
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.h40
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.cpp13
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.h61
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp33
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.h24
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.cpp103
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.h32
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.cpp9
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.h64
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.cpp9
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.h38
15 files changed, 463 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 76c76d64b79..5d012e50da6 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -92,6 +92,7 @@ vespa_define_module(
src/vespa/eval/eval/value_cache
src/vespa/eval/gp
src/vespa/eval/instruction
+ src/vespa/eval/streamed
src/vespa/eval/tensor
src/vespa/eval/tensor/dense
src/vespa/eval/tensor/mixed
diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt
index ee9a793bba0..10167c9d74a 100644
--- a/eval/src/vespa/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/CMakeLists.txt
@@ -7,6 +7,7 @@ vespa_add_library(vespaeval
$<TARGET_OBJECTS:eval_eval_test>
$<TARGET_OBJECTS:eval_eval_value_cache>
$<TARGET_OBJECTS:eval_gp>
+ $<TARGET_OBJECTS:eval_streamed>
$<TARGET_OBJECTS:eval_tensor>
$<TARGET_OBJECTS:eval_tensor_dense>
$<TARGET_OBJECTS:eval_tensor_mixed>
diff --git a/eval/src/vespa/eval/streamed/CMakeLists.txt b/eval/src/vespa/eval/streamed/CMakeLists.txt
new file mode 100644
index 00000000000..ee928d7b2c9
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_library(eval_streamed OBJECT
+ SOURCES
+ streamed_value.cpp
+ streamed_value_index.cpp
+ streamed_value_utils.cpp
+ streamed_value_builder.cpp
+ streamed_value_builder_factory.cpp
+ streamed_value_view.cpp
+)
diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp
new file mode 100644
index 00000000000..2826a712cc9
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value.cpp
@@ -0,0 +1,24 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".vespalib.eval.streamed.streamed_value");
+
+namespace vespalib::eval {
+
+template <typename T>
+StreamedValue<T>::~StreamedValue() = default;
+
+template <typename T>
+MemoryUsage
+StreamedValue<T>::get_memory_usage() const
+{
+ return self_memory_usage<StreamedValue<T>>();
+}
+
+template class StreamedValue<double>;
+template class StreamedValue<float>;
+
+} // namespace
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h
new file mode 100644
index 00000000000..bb9bb1f30b8
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value.h
@@ -0,0 +1,40 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include "streamed_value_index.h"
+
+namespace vespalib::eval {
+
+template <typename T>
+class StreamedValue : public Value
+{
+private:
+ ValueType _type;
+ std::vector<T> _my_cells;
+ Array<char> _label_buf;
+ StreamedValueIndex _my_index;
+
+public:
+ StreamedValue(ValueType type, std::vector<T> cells, size_t num_ss, Array<char> && label_buf)
+ : _type(std::move(type)),
+ _my_cells(std::move(cells)),
+ _label_buf(std::move(label_buf)),
+ _my_index(_type.count_mapped_dimensions(),
+ num_ss,
+ ConstArrayRef<char>(_label_buf.begin(), _label_buf.size()))
+ {
+ if (num_ss * _type.dense_subspace_size() != _my_cells.size()) abort();
+ }
+
+ ~StreamedValue();
+ const ValueType &type() const final override { return _type; }
+ TypedCells cells() const final override { return TypedCells(_my_cells); }
+ const Value::Index &index() const override { return _my_index; }
+ MemoryUsage get_memory_usage() const final override;
+ auto serialize_index() const { return _my_index.serialize(); }
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp
new file mode 100644
index 00000000000..957121c42b7
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp
@@ -0,0 +1,13 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_builder.h"
+
+namespace vespalib::eval {
+
+template<typename T>
+StreamedValueBuilder<T>::~StreamedValueBuilder() = default;
+
+template class StreamedValueBuilder<double>;
+template class StreamedValueBuilder<float>;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h
new file mode 100644
index 00000000000..ca75dac840b
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h
@@ -0,0 +1,61 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "streamed_value.h"
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace vespalib::eval {
+
+template <typename T>
+class StreamedValueBuilder : public ValueBuilder<T>
+{
+private:
+ ValueType _type;
+ size_t _dsss;
+ std::vector<T> _cells;
+ size_t _num_subspaces;
+ nbostream _labels;
+public:
+ StreamedValueBuilder(const ValueType &type,
+ size_t num_mapped_in,
+ size_t subspace_size_in,
+ size_t expected_subspaces)
+ : _type(type),
+ _dsss(subspace_size_in),
+ _cells(),
+ _num_subspaces(0),
+ _labels()
+ {
+ _cells.reserve(subspace_size_in * expected_subspaces);
+ // assume small sized label strings:
+ _labels.reserve(num_mapped_in * expected_subspaces * 3);
+ };
+
+ ~StreamedValueBuilder();
+
+ ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
+ for (auto label : addr) {
+ _labels.writeSmallString(label);
+ }
+ size_t old_sz = _cells.size();
+ _cells.resize(old_sz + _dsss);
+ _num_subspaces++;
+ return ArrayRef<T>(&_cells[old_sz], _dsss);
+ }
+
+ std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) override {
+ if (_num_subspaces == 0 && _type.count_mapped_dimensions() == 0) {
+ // add required dense subspace
+ add_subspace({});
+ }
+ // note: _num_subspaces * _dsss == _cells.size()
+ return std::make_unique<StreamedValue<T>>(std::move(_type),
+ std::move(_cells),
+ _num_subspaces,
+ _labels.extract_buffer());
+ }
+
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
new file mode 100644
index 00000000000..550331d1905
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
@@ -0,0 +1,33 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_builder_factory.h"
+#include "streamed_value_builder.h"
+
+namespace vespalib::eval {
+
+struct SelectStreamedValueBuilder {
+ template <typename T, typename ...Args>
+ static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type, Args &&...args)
+ {
+ assert(check_cell_type<T>(type.cell_type()));
+ return std::make_unique<StreamedValueBuilder<T>>(type, std::forward<Args>(args)...);
+ }
+};
+
+std::unique_ptr<ValueBuilderBase>
+StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type,
+ size_t num_mapped_in,
+ size_t subspace_size_in,
+ size_t expected_subspaces) const
+{
+ return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>(
+ type.cell_type(),
+ type, num_mapped_in, subspace_size_in, expected_subspaces);
+}
+
+StreamedValueBuilderFactory::~StreamedValueBuilderFactory() = default;
+StreamedValueBuilderFactory StreamedValueBuilderFactory::_factory;
+
+} // namespace
+
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
new file mode 100644
index 00000000000..3f81981f429
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
@@ -0,0 +1,24 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "streamed_value.h"
+
+namespace vespalib::eval {
+
+/**
+ * A factory that can generate appropriate ValueBuilder instances
+ */
+struct StreamedValueBuilderFactory : ValueBuilderFactory {
+private:
+ StreamedValueBuilderFactory() {}
+ static StreamedValueBuilderFactory _factory;
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(
+ const ValueType &type, size_t num_mapped_in,
+ size_t subspace_size_in, size_t expected_subspaces) const override;
+public:
+ static const StreamedValueBuilderFactory &get() { return _factory; }
+ ~StreamedValueBuilderFactory();
+};
+
+}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
new file mode 100644
index 00000000000..5e50765255e
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
@@ -0,0 +1,103 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_index.h"
+#include "streamed_value_utils.h"
+
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.tensor.streamed_value_index");
+
+namespace vespalib::eval {
+
+namespace {
+
+struct StreamedFilterView : Value::Index::View
+{
+ LabelBlockStream label_blocks;
+ std::vector<size_t> view_dims;
+ std::vector<vespalib::stringref> to_match;
+
+ StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in)
+ : label_blocks(std::move(labels)),
+ view_dims(std::move(view_dims_in)),
+ to_match()
+ {
+ to_match.reserve(view_dims.size());
+ }
+
+ void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ label_blocks.reset();
+ to_match.clear();
+ for (auto ptr : addr) {
+ to_match.push_back(*ptr);
+ }
+ assert(view_dims.size() == to_match.size());
+ }
+
+ bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ while (const auto block = label_blocks.next_block()) {
+ idx_out = block.ss_idx;
+ bool matches = true;
+ size_t out_idx = 0;
+ size_t vdm_idx = 0;
+ for (size_t dim = 0; dim < block.address.size(); ++dim) {
+ if (vdm_idx < view_dims.size() && (view_dims[vdm_idx] == dim)) {
+ if (block.address[dim] != to_match[vdm_idx]) {
+ matches = false;
+ }
+ ++vdm_idx;
+ } else {
+ *addr_out[out_idx++] = block.address[dim];
+ }
+ }
+ assert(out_idx == addr_out.size());
+ assert(vdm_idx == view_dims.size());
+ if (matches) return true;
+ }
+ return false;
+ }
+};
+
+struct StreamedIterationView : Value::Index::View
+{
+ LabelBlockStream label_blocks;
+
+ StreamedIterationView(LabelBlockStream labels)
+ : label_blocks(std::move(labels))
+ {}
+
+ void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ label_blocks.reset();
+ assert(addr.size() == 0);
+ }
+
+ bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ if (auto block = label_blocks.next_block()) {
+ idx_out = block.ss_idx;
+ size_t i = 0;
+ for (auto ptr : addr_out) {
+ *ptr = block.address[i++];
+ }
+ assert(i == block.address.size());
+ return true;
+ }
+ return false;
+ }
+};
+
+} // namespace <unnamed>
+
+std::unique_ptr<Value::Index::View>
+StreamedValueIndex::create_view(const std::vector<size_t> &dims) const
+{
+ LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims);
+ if (dims.empty()) {
+ return std::make_unique<StreamedIterationView>(std::move(label_stream));
+ }
+ return std::make_unique<StreamedFilterView>(std::move(label_stream), dims);
+}
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h
new file mode 100644
index 00000000000..f2571bd27c4
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.h
@@ -0,0 +1,32 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value.h>
+
+namespace vespalib::eval {
+
+class StreamedValueIndex : public Value::Index
+{
+public:
+ struct SerializedForm {
+ uint32_t num_mapped_dims;
+ uint32_t num_subspaces;
+ ConstArrayRef<char> labels_buffer;
+ };
+ StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf)
+ : _data{num_mapped_dims, num_subspaces, labels_buf}
+ {}
+
+ // index API:
+ size_t size() const override { return _data.num_subspaces; }
+ std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
+
+ SerializedForm serialize() const { return _data; }
+
+private:
+ SerializedForm _data;
+};
+
+} // namespace
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.cpp b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp
new file mode 100644
index 00000000000..1b4a91a9080
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_utils.h"
+
+namespace vespalib::eval {
+
+LabelBlockStream::~LabelBlockStream() = default;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h
new file mode 100644
index 00000000000..7610e2cacd4
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h
@@ -0,0 +1,64 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace vespalib::eval {
+
+struct LabelStream {
+ nbostream_longlivedbuf source;
+ LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {}
+ vespalib::stringref next_label() {
+ size_t str_size = source.getInt1_4Bytes();
+ vespalib::stringref label(source.peek(), str_size);
+ source.adjustReadPos(str_size);
+ return label;
+ }
+ void reset() { source.rp(0); }
+};
+
+struct LabelBlock {
+ static constexpr size_t npos = -1;
+ size_t ss_idx;
+ ConstArrayRef<vespalib::stringref> address;
+ operator bool() const { return ss_idx != npos; }
+};
+
+class LabelBlockStream {
+private:
+ size_t _num_subspaces;
+ LabelStream _labels;
+ size_t _subspace_index;
+ std::vector<vespalib::stringref> _current_address;
+public:
+ LabelBlock next_block() {
+ if (_subspace_index < _num_subspaces) {
+ for (auto & label : _current_address) {
+ label = _labels.next_label();
+ }
+ return LabelBlock{_subspace_index++, _current_address};
+ } else {
+ return LabelBlock{LabelBlock::npos, {}};
+ }
+ }
+
+ void reset() {
+ _subspace_index = 0;
+ _labels.reset();
+ }
+
+ LabelBlockStream(uint32_t num_subspaces,
+ ConstArrayRef<char> label_buf,
+ uint32_t num_mapped_dims)
+ : _num_subspaces(num_subspaces),
+ _labels(label_buf),
+ _subspace_index(-1),
+ _current_address(num_mapped_dims)
+ {}
+
+ ~LabelBlockStream();
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.cpp b/eval/src/vespa/eval/streamed/streamed_value_view.cpp
new file mode 100644
index 00000000000..87e1e676692
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_view.h"
+
+namespace vespalib::eval {
+
+StreamedValueView::~StreamedValueView() = default;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h
new file mode 100644
index 00000000000..5dd428bee38
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include "streamed_value_index.h"
+
+namespace vespalib::eval {
+
+class StreamedValueView : public Value
+{
+private:
+ const ValueType &_type;
+ TypedCells _cells_ref;
+ StreamedValueIndex _my_index;
+
+public:
+ StreamedValueView(const ValueType &type, TypedCells cells,
+ size_t num_ss, ConstArrayRef<char> labels_buf)
+ : _type(type),
+ _cells_ref(cells),
+ _my_index(_type.count_mapped_dimensions(), num_ss, labels_buf)
+ {
+ if (num_ss * _type.dense_subspace_size() != _cells_ref.size) abort();
+ }
+
+ ~StreamedValueView();
+ const ValueType &type() const final override { return _type; }
+ TypedCells cells() const final override { return _cells_ref; }
+ const Value::Index &index() const override { return _my_index; }
+ MemoryUsage get_memory_usage() const final override {
+ return self_memory_usage<StreamedValueView>();
+ }
+ auto serialize_index() const { return _my_index.serialize(); }
+};
+
+} // namespace