summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-11-25 16:03:08 +0100
committerGitHub <noreply@github.com>2020-11-25 16:03:08 +0100
commit95b62d332fb2a138ed3ef00ed93491a079332405 (patch)
tree02628d18634149626a754e0257761f108f786a04
parenta8e1073672127ec0e40aad860e398f831561106f (diff)
parentee4a487bef5372eab6c82dc55c981baa60641b99 (diff)
Merge pull request #15445 from vespa-engine/arnej/add-simple-streamed-value
Arnej/add simple streamed value
-rw-r--r--eval/CMakeLists.txt2
-rw-r--r--eval/src/tests/streamed/value/CMakeLists.txt9
-rw-r--r--eval/src/tests/streamed/value/streamed_value_test.cpp136
-rw-r--r--eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp6
-rw-r--r--eval/src/vespa/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/cell_type.cpp16
-rw-r--r--eval/src/vespa/eval/eval/cell_type.h24
-rw-r--r--eval/src/vespa/eval/streamed/CMakeLists.txt11
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.cpp28
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.h48
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.cpp13
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.h66
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp36
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.h24
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.cpp100
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.h36
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.cpp9
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.h76
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.cpp9
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.h45
20 files changed, 693 insertions, 2 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 021f3e39e40..ee8509fcf19 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -53,6 +53,7 @@ vespa_define_module(
src/tests/instruction/dense_tensor_peek_function
src/tests/instruction/index_lookup_table
src/tests/instruction/join_with_number
+ src/tests/streamed/value
src/tests/tensor/dense_add_dimension_optimizer
src/tests/tensor/dense_dimension_combiner
src/tests/tensor/dense_fast_rename_optimizer
@@ -90,6 +91,7 @@ vespa_define_module(
src/vespa/eval/eval/value_cache
src/vespa/eval/gp
src/vespa/eval/instruction
+ src/vespa/eval/streamed
src/vespa/eval/tensor
src/vespa/eval/tensor/dense
src/vespa/eval/tensor/serialization
diff --git a/eval/src/tests/streamed/value/CMakeLists.txt b/eval/src/tests/streamed/value/CMakeLists.txt
new file mode 100644
index 00000000000..d2ccced8c14
--- /dev/null
+++ b/eval/src/tests/streamed/value/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_streamed_value_test_app TEST
+ SOURCES
+ streamed_value_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_streamed_value_test_app COMMAND eval_streamed_value_test_app)
diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp
new file mode 100644
index 00000000000..3de6ba0fb63
--- /dev/null
+++ b/eval/src/tests/streamed/value/streamed_value_test.cpp
@@ -0,0 +1,136 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/streamed/streamed_value_builder_factory.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/instruction/generic_join.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/eval/test/tensor_model.hpp>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::eval::instruction;
+using namespace vespalib::eval::test;
+
+using vespalib::make_string_short::fmt;
+
+using PA = std::vector<vespalib::stringref *>;
+using CPA = std::vector<const vespalib::stringref *>;
+
+std::vector<Layout> layouts = {
+ {},
+ {x(3)},
+ {x(3),y(5)},
+ {x(3),y(5),z(7)},
+ float_cells({x(3),y(5),z(7)}),
+ {x({"a","b","c"})},
+ {x({"a","b","c"}),y({"foo","bar"})},
+ {x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})},
+ float_cells({x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}),
+ {x(3),y({"foo", "bar"}),z(7)},
+ {x({"a","b","c"}),y(5),z({"i","j","k","l"})},
+ float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})})
+};
+
+std::vector<Layout> join_layouts = {
+ {}, {},
+ {x(5)}, {x(5)},
+ {x(5)}, {y(5)},
+ {x(5)}, {x(5),y(5)},
+ {y(3)}, {x(2),z(3)},
+ {x(3),y(5)}, {y(5),z(7)},
+ float_cells({x(3),y(5)}), {y(5),z(7)},
+ {x(3),y(5)}, float_cells({y(5),z(7)}),
+ float_cells({x(3),y(5)}), float_cells({y(5),z(7)}),
+ {x({"a","b","c"})}, {x({"a","b","c"})},
+ {x({"a","b","c"})}, {x({"a","b"})},
+ {x({"a","b","c"})}, {y({"foo","bar","baz"})},
+ {x({"a","b","c"})}, {x({"a","b","c"}),y({"foo","bar","baz"})},
+ {x({"a","b"}),y({"foo","bar","baz"})}, {x({"a","b","c"}),y({"foo","bar"})},
+ {x({"a","b"}),y({"foo","bar","baz"})}, {y({"foo","bar"}),z({"i","j","k","l"})},
+ float_cells({x({"a","b"}),y({"foo","bar","baz"})}), {y({"foo","bar"}),z({"i","j","k","l"})},
+ {x({"a","b"}),y({"foo","bar","baz"})}, float_cells({y({"foo","bar"}),z({"i","j","k","l"})}),
+ float_cells({x({"a","b"}),y({"foo","bar","baz"})}), float_cells({y({"foo","bar"}),z({"i","j","k","l"})}),
+ {x(3),y({"foo", "bar"})}, {y({"foo", "bar"}),z(7)},
+ {x({"a","b","c"}),y(5)}, {y(5),z({"i","j","k","l"})},
+ float_cells({x({"a","b","c"}),y(5)}), {y(5),z({"i","j","k","l"})},
+ {x({"a","b","c"}),y(5)}, float_cells({y(5),z({"i","j","k","l"})}),
+ float_cells({x({"a","b","c"}),y(5)}), float_cells({y(5),z({"i","j","k","l"})})
+};
+
+TensorSpec simple_tensor_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) {
+ Stash stash;
+ const auto &engine = SimpleTensorEngine::ref();
+ auto lhs = engine.from_spec(a);
+ auto rhs = engine.from_spec(b);
+ const auto &result = engine.join(*lhs, *rhs, function, stash);
+ return engine.to_spec(result);
+}
+
+TensorSpec streamed_value_new_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) {
+ Stash stash;
+ const auto &factory = StreamedValueBuilderFactory::get();
+ auto lhs = value_from_spec(a, factory);
+ auto rhs = value_from_spec(b, factory);
+ auto my_op = GenericJoin::make_instruction(lhs->type(), rhs->type(), function, factory, stash);
+ InterpretedFunction::EvalSingle single(factory, my_op);
+ return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs})));
+}
+
+TEST(StreamedValueTest, streamed_values_can_be_converted_from_and_to_tensor_spec) {
+ for (const auto &layout: layouts) {
+ TensorSpec expect = spec(layout, N());
+ std::unique_ptr<Value> value = value_from_spec(expect, StreamedValueBuilderFactory::get());
+ TensorSpec actual = spec_from_value(*value);
+ EXPECT_EQ(actual, expect);
+ }
+}
+
+TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) {
+ ValueType type = ValueType::from_spec("tensor<float>(x{},y[2],z{})");
+ const auto &factory = StreamedValueBuilderFactory::get();
+ std::unique_ptr<ValueBuilder<float>> builder = factory.create_value_builder<float>(type);
+ float seq = 0.0;
+ for (vespalib::string x: {"a", "b", "c"}) {
+ for (vespalib::string y: {"aa", "bb"}) {
+ std::vector<vespalib::stringref> addr = {x, y};
+ auto subspace = builder->add_subspace(addr);
+ EXPECT_EQ(subspace.size(), 2);
+ subspace[0] = seq + 1.0;
+ subspace[1] = seq + 5.0;
+ seq += 10.0;
+ }
+ seq += 100.0;
+ }
+ std::unique_ptr<Value> value = builder->build(std::move(builder));
+ EXPECT_EQ(value->index().size(), 6);
+ auto view = value->index().create_view({0});
+ vespalib::stringref query = "b";
+ vespalib::stringref label;
+ size_t subspace;
+ view->lookup(CPA{&query});
+ EXPECT_TRUE(view->next_result(PA{&label}, subspace));
+ EXPECT_EQ(label, "aa");
+ EXPECT_EQ(subspace, 2);
+ EXPECT_TRUE(view->next_result(PA{&label}, subspace));
+ EXPECT_EQ(label, "bb");
+ EXPECT_EQ(subspace, 3);
+ EXPECT_FALSE(view->next_result(PA{&label}, subspace));
+}
+
+TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) {
+ ASSERT_TRUE((join_layouts.size() % 2) == 0);
+ for (size_t i = 0; i < join_layouts.size(); i += 2) {
+ TensorSpec lhs = spec(join_layouts[i], Div16(N()));
+ TensorSpec rhs = spec(join_layouts[i + 1], Div16(N()));
+ for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Max::f}) {
+ SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str()));
+ auto expect = simple_tensor_join(lhs, rhs, fun);
+ auto actual = streamed_value_new_join(lhs, rhs, fun);
+ EXPECT_EQ(actual, expect);
+ }
+ }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp
index 233aff0e425..6468f50a00e 100644
--- a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp
+++ b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp
@@ -3,11 +3,13 @@
#include <vespa/eval/eval/test/tensor_conformance.h>
#include <vespa/eval/eval/simple_tensor_engine.h>
#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/streamed/streamed_value_builder_factory.h>
#include <vespa/eval/eval/fast_value.h>
#include <vespa/eval/tensor/default_tensor_engine.h>
#include <vespa/vespalib/util/stringfmt.h>
using vespalib::eval::SimpleValueBuilderFactory;
+using vespalib::eval::StreamedValueBuilderFactory;
using vespalib::eval::FastValueBuilderFactory;
using vespalib::eval::SimpleTensorEngine;
using vespalib::eval::test::TensorConformance;
@@ -29,6 +31,10 @@ TEST("require that SimpleValue implementation passes all conformance tests") {
TEST_DO(TensorConformance::run_tests(module_src_path, SimpleValueBuilderFactory::get()));
}
+TEST("require that StreamedValue implementation passes all conformance tests") {
+ TEST_DO(TensorConformance::run_tests(module_src_path, StreamedValueBuilderFactory::get()));
+}
+
TEST("require that FastValue implementation passes all conformance tests") {
TEST_DO(TensorConformance::run_tests(module_src_path, FastValueBuilderFactory::get()));
}
diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt
index 9173278473d..952640195b1 100644
--- a/eval/src/vespa/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/CMakeLists.txt
@@ -7,6 +7,7 @@ vespa_add_library(vespaeval
$<TARGET_OBJECTS:eval_eval_test>
$<TARGET_OBJECTS:eval_eval_value_cache>
$<TARGET_OBJECTS:eval_gp>
+ $<TARGET_OBJECTS:eval_streamed>
$<TARGET_OBJECTS:eval_tensor>
$<TARGET_OBJECTS:eval_tensor_dense>
$<TARGET_OBJECTS:eval_tensor_serialization>
diff --git a/eval/src/vespa/eval/eval/cell_type.cpp b/eval/src/vespa/eval/eval/cell_type.cpp
index e5729c547b0..365a3f59a56 100644
--- a/eval/src/vespa/eval/eval/cell_type.cpp
+++ b/eval/src/vespa/eval/eval/cell_type.cpp
@@ -1,3 +1,19 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "cell_type.h"
+#include <stdio.h>
+#include <cstdlib>
+#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+using vespalib::make_string_short::fmt;
+
+namespace vespalib::eval {
+
+void
+CellTypeUtils::bad_argument(uint32_t id)
+{
+ throw IllegalArgumentException(fmt("Unknown CellType id=%u", id));
+}
+
+}
diff --git a/eval/src/vespa/eval/eval/cell_type.h b/eval/src/vespa/eval/eval/cell_type.h
index 0e878f26f47..49114d04bfe 100644
--- a/eval/src/vespa/eval/eval/cell_type.h
+++ b/eval/src/vespa/eval/eval/cell_type.h
@@ -3,7 +3,7 @@
#pragma once
#include <vespa/vespalib/util/typify.h>
-#include <cstdlib>
+#include <cstdint>
namespace vespalib::eval {
@@ -25,6 +25,26 @@ template <typename CT> inline CellType get_cell_type();
template <> inline CellType get_cell_type<double>() { return CellType::DOUBLE; }
template <> inline CellType get_cell_type<float>() { return CellType::FLOAT; }
+struct CellTypeUtils {
+ static void bad_argument [[ noreturn ]] (uint32_t id);
+
+ static constexpr uint32_t alignment(CellType cell_type) {
+ switch (cell_type) {
+ case CellType::DOUBLE: return sizeof(double);
+ case CellType::FLOAT: return sizeof(float);
+ }
+ bad_argument((uint32_t)cell_type);
+ }
+
+ static constexpr size_t mem_size(CellType cell_type, size_t sz) {
+ switch (cell_type) {
+ case CellType::DOUBLE: return sz * sizeof(double);
+ case CellType::FLOAT: return sz * sizeof(float);
+ }
+ bad_argument((uint32_t)cell_type);
+ }
+};
+
struct TypifyCellType {
template <typename T> using Result = TypifyResultType<T>;
template <typename F> static decltype(auto) resolve(CellType value, F &&f) {
@@ -32,7 +52,7 @@ struct TypifyCellType {
case CellType::DOUBLE: return f(Result<double>());
case CellType::FLOAT: return f(Result<float>());
}
- abort();
+ CellTypeUtils::bad_argument((uint32_t)value);
}
};
diff --git a/eval/src/vespa/eval/streamed/CMakeLists.txt b/eval/src/vespa/eval/streamed/CMakeLists.txt
new file mode 100644
index 00000000000..ee928d7b2c9
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+vespa_add_library(eval_streamed OBJECT
+ SOURCES
+ streamed_value.cpp
+ streamed_value_index.cpp
+ streamed_value_utils.cpp
+ streamed_value_builder.cpp
+ streamed_value_builder_factory.cpp
+ streamed_value_view.cpp
+)
diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp
new file mode 100644
index 00000000000..bdfe5fd4e27
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value.cpp
@@ -0,0 +1,28 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value.h"
+#include <vespa/log/log.h>
+
+LOG_SETUP(".vespalib.eval.streamed.streamed_value");
+
+namespace vespalib::eval {
+
+template <typename T>
+StreamedValue<T>::~StreamedValue() = default;
+
+template <typename T>
+MemoryUsage
+StreamedValue<T>::get_memory_usage() const
+{
+ MemoryUsage usage = self_memory_usage<StreamedValue<T>>();
+ usage.merge(vector_extra_memory_usage(_my_cells));
+ usage.incUsedBytes(_label_buf.byteSize());
+ usage.incAllocatedBytes(_label_buf.byteCapacity());
+ return usage;
+}
+
+template class StreamedValue<double>;
+template class StreamedValue<float>;
+
+} // namespace
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h
new file mode 100644
index 00000000000..258802a53e8
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value.h
@@ -0,0 +1,48 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include "streamed_value_index.h"
+#include <cassert>
+
+namespace vespalib::eval {
+
+/**
+ * A very simple Value implementation.
+ * Cheap to construct from serialized data,
+ * and cheap to serialize or iterate through.
+ * Slow for full or partial lookups.
+ **/
+template <typename T>
+class StreamedValue : public Value
+{
+private:
+ ValueType _type;
+ std::vector<T> _my_cells;
+ Array<char> _label_buf;
+ StreamedValueIndex _my_index;
+
+public:
+ StreamedValue(ValueType type, size_t num_mapped_dimensions,
+ std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf)
+ : _type(std::move(type)),
+ _my_cells(std::move(cells)),
+ _label_buf(std::move(label_buf)),
+ _my_index(num_mapped_dimensions,
+ num_subspaces,
+ ConstArrayRef<char>(_label_buf.begin(), _label_buf.size()))
+ {
+ assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size());
+ }
+
+ ~StreamedValue();
+ const ValueType &type() const final override { return _type; }
+ TypedCells cells() const final override { return TypedCells(_my_cells); }
+ const Value::Index &index() const final override { return _my_index; }
+ MemoryUsage get_memory_usage() const final override;
+ auto get_data_reference() const { return _my_index.get_data_reference(); }
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp
new file mode 100644
index 00000000000..957121c42b7
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp
@@ -0,0 +1,13 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_builder.h"
+
+namespace vespalib::eval {
+
+template<typename T>
+StreamedValueBuilder<T>::~StreamedValueBuilder() = default;
+
+template class StreamedValueBuilder<double>;
+template class StreamedValueBuilder<float>;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h
new file mode 100644
index 00000000000..5698c805756
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h
@@ -0,0 +1,66 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "streamed_value.h"
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace vespalib::eval {
+
+ /**
+ * Builder for StreamedValue objects.
+ **/
+template <typename T>
+class StreamedValueBuilder : public ValueBuilder<T>
+{
+private:
+ ValueType _type;
+ size_t _num_mapped_dimensions;
+ size_t _dense_subspace_size;
+ std::vector<T> _cells;
+ size_t _num_subspaces;
+ nbostream _labels;
+public:
+ StreamedValueBuilder(const ValueType &type,
+ size_t num_mapped_in,
+ size_t subspace_size_in,
+ size_t expected_subspaces)
+ : _type(type),
+ _num_mapped_dimensions(num_mapped_in),
+ _dense_subspace_size(subspace_size_in),
+ _cells(),
+ _num_subspaces(0),
+ _labels()
+ {
+ _cells.reserve(subspace_size_in * expected_subspaces);
+ // assume small sized label strings:
+ _labels.reserve(num_mapped_in * expected_subspaces * 3);
+ };
+
+ ~StreamedValueBuilder();
+
+ ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
+ for (auto label : addr) {
+ _labels.writeSmallString(label);
+ }
+ size_t old_sz = _cells.size();
+ _cells.resize(old_sz + _dense_subspace_size);
+ _num_subspaces++;
+ return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size);
+ }
+
+ std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) override {
+ if (_num_mapped_dimensions == 0) {
+ assert(_num_subspaces == 1);
+ }
+ assert(_num_subspaces * _dense_subspace_size == _cells.size());
+ return std::make_unique<StreamedValue<T>>(std::move(_type),
+ _num_mapped_dimensions,
+ std::move(_cells),
+ _num_subspaces,
+ _labels.extract_buffer());
+ }
+
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
new file mode 100644
index 00000000000..aa6347a2c51
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
@@ -0,0 +1,36 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_builder_factory.h"
+#include "streamed_value_builder.h"
+
+namespace vespalib::eval {
+
+struct SelectStreamedValueBuilder {
+ template <typename T>
+ static std::unique_ptr<ValueBuilderBase> invoke(
+ const ValueType &type, size_t num_mapped,
+ size_t subspace_size, size_t expected_subspaces)
+ {
+ assert(check_cell_type<T>(type.cell_type()));
+ return std::make_unique<StreamedValueBuilder<T>>(
+ type, num_mapped, subspace_size, expected_subspaces);
+ }
+};
+
+std::unique_ptr<ValueBuilderBase>
+StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type,
+ size_t num_mapped,
+ size_t subspace_size,
+ size_t expected_subspaces) const
+{
+ return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>(
+ type.cell_type(),
+ type, num_mapped, subspace_size, expected_subspaces);
+}
+
+StreamedValueBuilderFactory::~StreamedValueBuilderFactory() = default;
+StreamedValueBuilderFactory StreamedValueBuilderFactory::_factory;
+
+} // namespace
+
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
new file mode 100644
index 00000000000..3f81981f429
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
@@ -0,0 +1,24 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "streamed_value.h"
+
+namespace vespalib::eval {
+
+/**
+ * A factory that can generate appropriate ValueBuilder instances
+ */
+struct StreamedValueBuilderFactory : ValueBuilderFactory {
+private:
+ StreamedValueBuilderFactory() {}
+ static StreamedValueBuilderFactory _factory;
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(
+ const ValueType &type, size_t num_mapped_in,
+ size_t subspace_size_in, size_t expected_subspaces) const override;
+public:
+ static const StreamedValueBuilderFactory &get() { return _factory; }
+ ~StreamedValueBuilderFactory();
+};
+
+}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
new file mode 100644
index 00000000000..38b57e9c660
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
@@ -0,0 +1,100 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_index.h"
+#include "streamed_value_utils.h"
+
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.tensor.streamed_value_index");
+
+namespace vespalib::eval {
+
+namespace {
+
+struct StreamedFilterView : Value::Index::View
+{
+ LabelBlockStream label_blocks;
+ std::vector<size_t> view_dims;
+ std::vector<vespalib::stringref> to_match;
+
+ StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in)
+ : label_blocks(std::move(labels)),
+ view_dims(std::move(view_dims_in)),
+ to_match()
+ {
+ to_match.reserve(view_dims.size());
+ }
+
+ void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ label_blocks.reset();
+ to_match.clear();
+ for (auto ptr : addr) {
+ to_match.push_back(*ptr);
+ }
+ assert(view_dims.size() == to_match.size());
+ }
+
+ bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ while (const auto block = label_blocks.next_block()) {
+ idx_out = block.ss_idx;
+ bool matches = true;
+ size_t out_idx = 0;
+ size_t vdm_idx = 0;
+ for (size_t dim = 0; dim < block.address.size(); ++dim) {
+ if (vdm_idx < view_dims.size() && (view_dims[vdm_idx] == dim)) {
+ matches &= (block.address[dim] == to_match[vdm_idx++]);
+ } else {
+ *addr_out[out_idx++] = block.address[dim];
+ }
+ }
+ assert(out_idx == addr_out.size());
+ assert(vdm_idx == view_dims.size());
+ if (matches) return true;
+ }
+ return false;
+ }
+};
+
+struct StreamedIterationView : Value::Index::View
+{
+ LabelBlockStream label_blocks;
+
+ StreamedIterationView(LabelBlockStream labels)
+ : label_blocks(std::move(labels))
+ {}
+
+ void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ label_blocks.reset();
+ assert(addr.size() == 0);
+ }
+
+ bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ if (auto block = label_blocks.next_block()) {
+ idx_out = block.ss_idx;
+ size_t i = 0;
+ assert(addr_out.size() == block.address.size());
+ for (auto ptr : addr_out) {
+ *ptr = block.address[i++];
+ }
+ return true;
+ }
+ return false;
+ }
+};
+
+} // namespace <unnamed>
+
+std::unique_ptr<Value::Index::View>
+StreamedValueIndex::create_view(const std::vector<size_t> &dims) const
+{
+ LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims);
+ if (dims.empty()) {
+ return std::make_unique<StreamedIterationView>(std::move(label_stream));
+ }
+ return std::make_unique<StreamedFilterView>(std::move(label_stream), dims);
+}
+
+} // namespace vespalib::eval
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h
new file mode 100644
index 00000000000..8fd561200c3
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.h
@@ -0,0 +1,36 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value.h>
+
+namespace vespalib::eval {
+
+ /**
+ * Implements Value::Index by reading a stream of serialized
+ * labels.
+ **/
+class StreamedValueIndex : public Value::Index
+{
+public:
+ struct SerializedDataRef {
+ uint32_t num_mapped_dims;
+ uint32_t num_subspaces;
+ ConstArrayRef<char> labels_buffer;
+ };
+ StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf)
+ : _data{num_mapped_dims, num_subspaces, labels_buf}
+ {}
+
+ // index API:
+ size_t size() const override { return _data.num_subspaces; }
+ std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
+
+ SerializedDataRef get_data_reference() const { return _data; }
+
+private:
+ SerializedDataRef _data;
+};
+
+} // namespace
+
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.cpp b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp
new file mode 100644
index 00000000000..1b4a91a9080
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_utils.h"
+
+namespace vespalib::eval {
+
+LabelBlockStream::~LabelBlockStream() = default;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h
new file mode 100644
index 00000000000..3e3da82dd22
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h
@@ -0,0 +1,76 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+namespace vespalib::eval {
+
+/**
+ * Reads a stream of serialized labels.
+ * Reading more labels than available will
+ * throw an exception.
+ **/
+struct LabelStream {
+ nbostream source;
+ LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {}
+ vespalib::stringref next_label() {
+ size_t str_size = source.getInt1_4Bytes();
+ vespalib::stringref label(source.peek(), str_size);
+ source.adjustReadPos(str_size);
+ return label;
+ }
+ void reset() { source.rp(0); }
+};
+
+/**
+ * Represents an address (set of labels) mapping to a subspace index
+ **/
+struct LabelBlock {
+ static constexpr size_t npos = -1;
+ size_t ss_idx;
+ ConstArrayRef<vespalib::stringref> address;
+ operator bool() const { return ss_idx != npos; }
+};
+
+/**
+ * Utility for reading a buffer with serialized labels
+ * as a stream of LabelBlock objects.
+ **/
+class LabelBlockStream {
+private:
+ size_t _num_subspaces;
+ LabelStream _labels;
+ size_t _subspace_index;
+ std::vector<vespalib::stringref> _current_address;
+public:
+ LabelBlock next_block() {
+ if (_subspace_index < _num_subspaces) {
+ for (auto & label : _current_address) {
+ label = _labels.next_label();
+ }
+ return LabelBlock{_subspace_index++, _current_address};
+ } else {
+ return LabelBlock{LabelBlock::npos, {}};
+ }
+ }
+
+ void reset() {
+ _subspace_index = 0;
+ _labels.reset();
+ }
+
+ LabelBlockStream(uint32_t num_subspaces,
+ ConstArrayRef<char> label_buf,
+ uint32_t num_mapped_dims)
+ : _num_subspaces(num_subspaces),
+ _labels(label_buf),
+ _subspace_index(num_subspaces),
+ _current_address(num_mapped_dims)
+ {}
+
+ ~LabelBlockStream();
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.cpp b/eval/src/vespa/eval/streamed/streamed_value_view.cpp
new file mode 100644
index 00000000000..87e1e676692
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "streamed_value_view.h"
+
+namespace vespalib::eval {
+
+StreamedValueView::~StreamedValueView() = default;
+
+} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h
new file mode 100644
index 00000000000..e37f442dd9a
--- /dev/null
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.h
@@ -0,0 +1,45 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/value_type.h>
+#include <vespa/eval/eval/value.h>
+#include "streamed_value_index.h"
+#include <cassert>
+
+namespace vespalib::eval {
+
+ /**
+ * Same characteristics as StreamedValue, but does not
+ * own its data - refers to type, cells and serialized
+ * labels that must be kept outside the Value.
+ **/
+class StreamedValueView : public Value
+{
+private:
+ const ValueType &_type;
+ TypedCells _cells_ref;
+ StreamedValueIndex _my_index;
+
+public:
+ StreamedValueView(const ValueType &type, size_t num_mapped_dimensions,
+ TypedCells cells, size_t num_subspaces,
+ ConstArrayRef<char> labels_buf)
+ : _type(type),
+ _cells_ref(cells),
+ _my_index(num_mapped_dimensions, num_subspaces, labels_buf)
+ {
+ assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size);
+ }
+
+ ~StreamedValueView();
+ const ValueType &type() const final override { return _type; }
+ TypedCells cells() const final override { return _cells_ref; }
+ const Value::Index &index() const final override { return _my_index; }
+ MemoryUsage get_memory_usage() const final override {
+ return self_memory_usage<StreamedValueView>();
+ }
+ auto get_data_reference() const { return _my_index.get_data_reference(); }
+};
+
+} // namespace