diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-11-25 16:03:08 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-25 16:03:08 +0100 |
commit | 95b62d332fb2a138ed3ef00ed93491a079332405 (patch) | |
tree | 02628d18634149626a754e0257761f108f786a04 | |
parent | a8e1073672127ec0e40aad860e398f831561106f (diff) | |
parent | ee4a487bef5372eab6c82dc55c981baa60641b99 (diff) |
Merge pull request #15445 from vespa-engine/arnej/add-simple-streamed-value
Arnej/add simple streamed value
20 files changed, 693 insertions, 2 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 021f3e39e40..ee8509fcf19 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -53,6 +53,7 @@ vespa_define_module( src/tests/instruction/dense_tensor_peek_function src/tests/instruction/index_lookup_table src/tests/instruction/join_with_number + src/tests/streamed/value src/tests/tensor/dense_add_dimension_optimizer src/tests/tensor/dense_dimension_combiner src/tests/tensor/dense_fast_rename_optimizer @@ -90,6 +91,7 @@ vespa_define_module( src/vespa/eval/eval/value_cache src/vespa/eval/gp src/vespa/eval/instruction + src/vespa/eval/streamed src/vespa/eval/tensor src/vespa/eval/tensor/dense src/vespa/eval/tensor/serialization diff --git a/eval/src/tests/streamed/value/CMakeLists.txt b/eval/src/tests/streamed/value/CMakeLists.txt new file mode 100644 index 00000000000..d2ccced8c14 --- /dev/null +++ b/eval/src/tests/streamed/value/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_streamed_value_test_app TEST + SOURCES + streamed_value_test.cpp + DEPENDS + vespaeval + GTest::GTest +) +vespa_add_test(NAME eval_streamed_value_test_app COMMAND eval_streamed_value_test_app) diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp new file mode 100644 index 00000000000..3de6ba0fb63 --- /dev/null +++ b/eval/src/tests/streamed/value/streamed_value_test.cpp @@ -0,0 +1,136 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/streamed/streamed_value_builder_factory.h> +#include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/instruction/generic_join.h> +#include <vespa/eval/eval/interpreted_function.h> +#include <vespa/eval/eval/test/tensor_model.hpp> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::eval::instruction; +using namespace vespalib::eval::test; + +using vespalib::make_string_short::fmt; + +using PA = std::vector<vespalib::stringref *>; +using CPA = std::vector<const vespalib::stringref *>; + +std::vector<Layout> layouts = { + {}, + {x(3)}, + {x(3),y(5)}, + {x(3),y(5),z(7)}, + float_cells({x(3),y(5),z(7)}), + {x({"a","b","c"})}, + {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y({"foo","bar"}),z({"i","j","k","l"})}), + {x(3),y({"foo", "bar"}),z(7)}, + {x({"a","b","c"}),y(5),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y(5),z({"i","j","k","l"})}) +}; + +std::vector<Layout> join_layouts = { + {}, {}, + {x(5)}, {x(5)}, + {x(5)}, {y(5)}, + {x(5)}, {x(5),y(5)}, + {y(3)}, {x(2),z(3)}, + {x(3),y(5)}, {y(5),z(7)}, + float_cells({x(3),y(5)}), {y(5),z(7)}, + {x(3),y(5)}, float_cells({y(5),z(7)}), + float_cells({x(3),y(5)}), float_cells({y(5),z(7)}), + {x({"a","b","c"})}, {x({"a","b","c"})}, + {x({"a","b","c"})}, {x({"a","b"})}, + {x({"a","b","c"})}, {y({"foo","bar","baz"})}, + {x({"a","b","c"})}, {x({"a","b","c"}),y({"foo","bar","baz"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {x({"a","b","c"}),y({"foo","bar"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, {y({"foo","bar"}),z({"i","j","k","l"})}, + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), {y({"foo","bar"}),z({"i","j","k","l"})}, + {x({"a","b"}),y({"foo","bar","baz"})}, float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + float_cells({x({"a","b"}),y({"foo","bar","baz"})}), float_cells({y({"foo","bar"}),z({"i","j","k","l"})}), + {x(3),y({"foo", "bar"})}, {y({"foo", "bar"}),z(7)}, + {x({"a","b","c"}),y(5)}, {y(5),z({"i","j","k","l"})}, + float_cells({x({"a","b","c"}),y(5)}), {y(5),z({"i","j","k","l"})}, + {x({"a","b","c"}),y(5)}, float_cells({y(5),z({"i","j","k","l"})}), + float_cells({x({"a","b","c"}),y(5)}), float_cells({y(5),z({"i","j","k","l"})}) +}; + +TensorSpec simple_tensor_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + Stash stash; + const auto &engine = SimpleTensorEngine::ref(); + auto lhs = engine.from_spec(a); + auto rhs = engine.from_spec(b); + const auto &result = engine.join(*lhs, *rhs, function, stash); + return engine.to_spec(result); +} + +TensorSpec streamed_value_new_join(const TensorSpec &a, const TensorSpec &b, join_fun_t function) { + Stash stash; + const auto &factory = StreamedValueBuilderFactory::get(); + auto lhs = value_from_spec(a, factory); + auto rhs = value_from_spec(b, factory); + auto my_op = GenericJoin::make_instruction(lhs->type(), rhs->type(), function, factory, stash); + InterpretedFunction::EvalSingle single(factory, my_op); + return spec_from_value(single.eval(std::vector<Value::CREF>({*lhs,*rhs}))); +} + +TEST(StreamedValueTest, streamed_values_can_be_converted_from_and_to_tensor_spec) { + for (const auto &layout: layouts) { + TensorSpec expect = spec(layout, N()); + std::unique_ptr<Value> value = value_from_spec(expect, StreamedValueBuilderFactory::get()); + TensorSpec actual = spec_from_value(*value); + EXPECT_EQ(actual, expect); + } +} + +TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) { + ValueType type = ValueType::from_spec("tensor<float>(x{},y[2],z{})"); + const auto &factory = StreamedValueBuilderFactory::get(); + std::unique_ptr<ValueBuilder<float>> builder = factory.create_value_builder<float>(type); + float seq = 0.0; + for (vespalib::string x: {"a", "b", "c"}) { + for (vespalib::string y: {"aa", "bb"}) { + std::vector<vespalib::stringref> addr = {x, y}; + auto subspace = builder->add_subspace(addr); + EXPECT_EQ(subspace.size(), 2); + subspace[0] = seq + 1.0; + subspace[1] = seq + 5.0; + seq += 10.0; + } + seq += 100.0; + } + std::unique_ptr<Value> value = builder->build(std::move(builder)); + EXPECT_EQ(value->index().size(), 6); + auto view = value->index().create_view({0}); + vespalib::stringref query = "b"; + vespalib::stringref label; + size_t subspace; + view->lookup(CPA{&query}); + EXPECT_TRUE(view->next_result(PA{&label}, subspace)); + EXPECT_EQ(label, "aa"); + EXPECT_EQ(subspace, 2); + EXPECT_TRUE(view->next_result(PA{&label}, subspace)); + EXPECT_EQ(label, "bb"); + EXPECT_EQ(subspace, 3); + EXPECT_FALSE(view->next_result(PA{&label}, subspace)); +} + +TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) { + ASSERT_TRUE((join_layouts.size() % 2) == 0); + for (size_t i = 0; i < join_layouts.size(); i += 2) { + TensorSpec lhs = spec(join_layouts[i], Div16(N())); + TensorSpec rhs = spec(join_layouts[i + 1], Div16(N())); + for (auto fun: {operation::Add::f, operation::Sub::f, operation::Mul::f, operation::Max::f}) { + SCOPED_TRACE(fmt("\n===\nLHS: %s\nRHS: %s\n===\n", lhs.to_string().c_str(), rhs.to_string().c_str())); + auto expect = simple_tensor_join(lhs, rhs, fun); + auto actual = streamed_value_new_join(lhs, rhs, fun); + EXPECT_EQ(actual, expect); + } + } +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp index 233aff0e425..6468f50a00e 100644 --- a/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp +++ b/eval/src/tests/tensor/tensor_conformance/tensor_conformance_test.cpp @@ -3,11 +3,13 @@ #include <vespa/eval/eval/test/tensor_conformance.h> #include <vespa/eval/eval/simple_tensor_engine.h> #include <vespa/eval/eval/simple_value.h> +#include <vespa/eval/streamed/streamed_value_builder_factory.h> #include <vespa/eval/eval/fast_value.h> #include <vespa/eval/tensor/default_tensor_engine.h> #include <vespa/vespalib/util/stringfmt.h> using vespalib::eval::SimpleValueBuilderFactory; +using vespalib::eval::StreamedValueBuilderFactory; using vespalib::eval::FastValueBuilderFactory; using vespalib::eval::SimpleTensorEngine; using vespalib::eval::test::TensorConformance; @@ -29,6 +31,10 @@ TEST("require that SimpleValue implementation passes all conformance tests") { TEST_DO(TensorConformance::run_tests(module_src_path, SimpleValueBuilderFactory::get())); } +TEST("require that StreamedValue implementation passes all conformance tests") { + TEST_DO(TensorConformance::run_tests(module_src_path, StreamedValueBuilderFactory::get())); +} + TEST("require that FastValue implementation passes all conformance tests") { TEST_DO(TensorConformance::run_tests(module_src_path, FastValueBuilderFactory::get())); } diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt index 9173278473d..952640195b1 100644 --- a/eval/src/vespa/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/CMakeLists.txt @@ -7,6 +7,7 @@ vespa_add_library(vespaeval $<TARGET_OBJECTS:eval_eval_test> $<TARGET_OBJECTS:eval_eval_value_cache> $<TARGET_OBJECTS:eval_gp> + $<TARGET_OBJECTS:eval_streamed> $<TARGET_OBJECTS:eval_tensor> $<TARGET_OBJECTS:eval_tensor_dense> $<TARGET_OBJECTS:eval_tensor_serialization> diff --git a/eval/src/vespa/eval/eval/cell_type.cpp b/eval/src/vespa/eval/eval/cell_type.cpp index e5729c547b0..365a3f59a56 100644 --- a/eval/src/vespa/eval/eval/cell_type.cpp +++ b/eval/src/vespa/eval/eval/cell_type.cpp @@ -1,3 +1,19 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "cell_type.h" +#include <stdio.h> +#include <cstdlib> +#include <vespa/vespalib/util/exceptions.h> +#include <vespa/vespalib/util/stringfmt.h> + +using vespalib::make_string_short::fmt; + +namespace vespalib::eval { + +void +CellTypeUtils::bad_argument(uint32_t id) +{ + throw IllegalArgumentException(fmt("Unknown CellType id=%u", id)); +} + +} diff --git a/eval/src/vespa/eval/eval/cell_type.h b/eval/src/vespa/eval/eval/cell_type.h index 0e878f26f47..49114d04bfe 100644 --- a/eval/src/vespa/eval/eval/cell_type.h +++ b/eval/src/vespa/eval/eval/cell_type.h @@ -3,7 +3,7 @@ #pragma once #include <vespa/vespalib/util/typify.h> -#include <cstdlib> +#include <cstdint> namespace vespalib::eval { @@ -25,6 +25,26 @@ template <typename CT> inline CellType get_cell_type(); template <> inline CellType get_cell_type<double>() { return CellType::DOUBLE; } template <> inline CellType get_cell_type<float>() { return CellType::FLOAT; } +struct CellTypeUtils { + static void bad_argument [[ noreturn ]] (uint32_t id); + + static constexpr uint32_t alignment(CellType cell_type) { + switch (cell_type) { + case CellType::DOUBLE: return sizeof(double); + case CellType::FLOAT: return sizeof(float); + } + bad_argument((uint32_t)cell_type); + } + + static constexpr size_t mem_size(CellType cell_type, size_t sz) { + switch (cell_type) { + case CellType::DOUBLE: return sz * sizeof(double); + case CellType::FLOAT: return sz * sizeof(float); + } + bad_argument((uint32_t)cell_type); + } +}; + struct TypifyCellType { template <typename T> using Result = TypifyResultType<T>; template <typename F> static decltype(auto) resolve(CellType value, F &&f) { @@ -32,7 +52,7 @@ struct TypifyCellType { case CellType::DOUBLE: return f(Result<double>()); case CellType::FLOAT: return f(Result<float>()); } - abort(); + CellTypeUtils::bad_argument((uint32_t)value); } }; diff --git a/eval/src/vespa/eval/streamed/CMakeLists.txt b/eval/src/vespa/eval/streamed/CMakeLists.txt new file mode 100644 index 00000000000..ee928d7b2c9 --- /dev/null +++ b/eval/src/vespa/eval/streamed/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +vespa_add_library(eval_streamed OBJECT + SOURCES + streamed_value.cpp + streamed_value_index.cpp + streamed_value_utils.cpp + streamed_value_builder.cpp + streamed_value_builder_factory.cpp + streamed_value_view.cpp +) diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp new file mode 100644 index 00000000000..bdfe5fd4e27 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -0,0 +1,28 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value.h" +#include <vespa/log/log.h> + +LOG_SETUP(".vespalib.eval.streamed.streamed_value"); + +namespace vespalib::eval { + +template <typename T> +StreamedValue<T>::~StreamedValue() = default; + +template <typename T> +MemoryUsage +StreamedValue<T>::get_memory_usage() const +{ + MemoryUsage usage = self_memory_usage<StreamedValue<T>>(); + usage.merge(vector_extra_memory_usage(_my_cells)); + usage.incUsedBytes(_label_buf.byteSize()); + usage.incAllocatedBytes(_label_buf.byteCapacity()); + return usage; +} + +template class StreamedValue<double>; +template class StreamedValue<float>; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h new file mode 100644 index 00000000000..258802a53e8 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -0,0 +1,48 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" +#include <cassert> + +namespace vespalib::eval { + +/** + * A very simple Value implementation. + * Cheap to construct from serialized data, + * and cheap to serialize or iterate through. + * Slow for full or partial lookups. + **/ +template <typename T> +class StreamedValue : public Value +{ +private: + ValueType _type; + std::vector<T> _my_cells; + Array<char> _label_buf; + StreamedValueIndex _my_index; + +public: + StreamedValue(ValueType type, size_t num_mapped_dimensions, + std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf) + : _type(std::move(type)), + _my_cells(std::move(cells)), + _label_buf(std::move(label_buf)), + _my_index(num_mapped_dimensions, + num_subspaces, + ConstArrayRef<char>(_label_buf.begin(), _label_buf.size())) + { + assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size()); + } + + ~StreamedValue(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return TypedCells(_my_cells); } + const Value::Index &index() const final override { return _my_index; } + MemoryUsage get_memory_usage() const final override; + auto get_data_reference() const { return _my_index.get_data_reference(); } +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp new file mode 100644 index 00000000000..957121c42b7 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.cpp @@ -0,0 +1,13 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +template<typename T> +StreamedValueBuilder<T>::~StreamedValueBuilder() = default; + +template class StreamedValueBuilder<double>; +template class StreamedValueBuilder<float>; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h new file mode 100644 index 00000000000..5698c805756 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -0,0 +1,66 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + + /** + * Builder for StreamedValue objects. + **/ +template <typename T> +class StreamedValueBuilder : public ValueBuilder<T> +{ +private: + ValueType _type; + size_t _num_mapped_dimensions; + size_t _dense_subspace_size; + std::vector<T> _cells; + size_t _num_subspaces; + nbostream _labels; +public: + StreamedValueBuilder(const ValueType &type, + size_t num_mapped_in, + size_t subspace_size_in, + size_t expected_subspaces) + : _type(type), + _num_mapped_dimensions(num_mapped_in), + _dense_subspace_size(subspace_size_in), + _cells(), + _num_subspaces(0), + _labels() + { + _cells.reserve(subspace_size_in * expected_subspaces); + // assume small sized label strings: + _labels.reserve(num_mapped_in * expected_subspaces * 3); + }; + + ~StreamedValueBuilder(); + + ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override { + for (auto label : addr) { + _labels.writeSmallString(label); + } + size_t old_sz = _cells.size(); + _cells.resize(old_sz + _dense_subspace_size); + _num_subspaces++; + return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size); + } + + std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) override { + if (_num_mapped_dimensions == 0) { + assert(_num_subspaces == 1); + } + assert(_num_subspaces * _dense_subspace_size == _cells.size()); + return std::make_unique<StreamedValue<T>>(std::move(_type), + _num_mapped_dimensions, + std::move(_cells), + _num_subspaces, + _labels.extract_buffer()); + } + +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp new file mode 100644 index 00000000000..aa6347a2c51 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_builder_factory.h" +#include "streamed_value_builder.h" + +namespace vespalib::eval { + +struct SelectStreamedValueBuilder { + template <typename T> + static std::unique_ptr<ValueBuilderBase> invoke( + const ValueType &type, size_t num_mapped, + size_t subspace_size, size_t expected_subspaces) + { + assert(check_cell_type<T>(type.cell_type())); + return std::make_unique<StreamedValueBuilder<T>>( + type, num_mapped, subspace_size, expected_subspaces); + } +}; + +std::unique_ptr<ValueBuilderBase> +StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type, + size_t num_mapped, + size_t subspace_size, + size_t expected_subspaces) const +{ + return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>( + type.cell_type(), + type, num_mapped, subspace_size, expected_subspaces); +} + +StreamedValueBuilderFactory::~StreamedValueBuilderFactory() = default; +StreamedValueBuilderFactory StreamedValueBuilderFactory::_factory; + +} // namespace + + diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h new file mode 100644 index 00000000000..3f81981f429 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h @@ -0,0 +1,24 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "streamed_value.h" + +namespace vespalib::eval { + +/** + * A factory that can generate appropriate ValueBuilder instances + */ +struct StreamedValueBuilderFactory : ValueBuilderFactory { +private: + StreamedValueBuilderFactory() {} + static StreamedValueBuilderFactory _factory; + std::unique_ptr<ValueBuilderBase> create_value_builder_base( + const ValueType &type, size_t num_mapped_in, + size_t subspace_size_in, size_t expected_subspaces) const override; +public: + static const StreamedValueBuilderFactory &get() { return _factory; } + ~StreamedValueBuilderFactory(); +}; + +} diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp new file mode 100644 index 00000000000..38b57e9c660 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -0,0 +1,100 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_index.h" +#include "streamed_value_utils.h" + +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/log/log.h> + +LOG_SETUP(".searchlib.tensor.streamed_value_index"); + +namespace vespalib::eval { + +namespace { + +struct StreamedFilterView : Value::Index::View +{ + LabelBlockStream label_blocks; + std::vector<size_t> view_dims; + std::vector<vespalib::stringref> to_match; + + StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) + : label_blocks(std::move(labels)), + view_dims(std::move(view_dims_in)), + to_match() + { + to_match.reserve(view_dims.size()); + } + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + to_match.clear(); + for (auto ptr : addr) { + to_match.push_back(*ptr); + } + assert(view_dims.size() == to_match.size()); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + while (const auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + bool matches = true; + size_t out_idx = 0; + size_t vdm_idx = 0; + for (size_t dim = 0; dim < block.address.size(); ++dim) { + if (vdm_idx < view_dims.size() && (view_dims[vdm_idx] == dim)) { + matches &= (block.address[dim] == to_match[vdm_idx++]); + } else { + *addr_out[out_idx++] = block.address[dim]; + } + } + assert(out_idx == addr_out.size()); + assert(vdm_idx == view_dims.size()); + if (matches) return true; + } + return false; + } +}; + +struct StreamedIterationView : Value::Index::View +{ + LabelBlockStream label_blocks; + + StreamedIterationView(LabelBlockStream labels) + : label_blocks(std::move(labels)) + {} + + void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + label_blocks.reset(); + assert(addr.size() == 0); + } + + bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + if (auto block = label_blocks.next_block()) { + idx_out = block.ss_idx; + size_t i = 0; + assert(addr_out.size() == block.address.size()); + for (auto ptr : addr_out) { + *ptr = block.address[i++]; + } + return true; + } + return false; + } +}; + +} // namespace <unnamed> + +std::unique_ptr<Value::Index::View> +StreamedValueIndex::create_view(const std::vector<size_t> &dims) const +{ + LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims); + if (dims.empty()) { + return std::make_unique<StreamedIterationView>(std::move(label_stream)); + } + return std::make_unique<StreamedFilterView>(std::move(label_stream), dims); +} + +} // namespace vespalib::eval diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h new file mode 100644 index 00000000000..8fd561200c3 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> + +namespace vespalib::eval { + + /** + * Implements Value::Index by reading a stream of serialized + * labels. + **/ +class StreamedValueIndex : public Value::Index +{ +public: + struct SerializedDataRef { + uint32_t num_mapped_dims; + uint32_t num_subspaces; + ConstArrayRef<char> labels_buffer; + }; + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf) + : _data{num_mapped_dims, num_subspaces, labels_buf} + {} + + // index API: + size_t size() const override { return _data.num_subspaces; } + std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; + + SerializedDataRef get_data_reference() const { return _data; } + +private: + SerializedDataRef _data; +}; + +} // namespace + diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.cpp b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp new file mode 100644 index 00000000000..1b4a91a9080 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_utils.h" + +namespace vespalib::eval { + +LabelBlockStream::~LabelBlockStream() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h new file mode 100644 index 00000000000..3e3da82dd22 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -0,0 +1,76 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value.h> +#include <vespa/vespalib/objects/nbostream.h> + +namespace vespalib::eval { + +/** + * Reads a stream of serialized labels. + * Reading more labels than available will + * throw an exception. + **/ +struct LabelStream { + nbostream source; + LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {} + vespalib::stringref next_label() { + size_t str_size = source.getInt1_4Bytes(); + vespalib::stringref label(source.peek(), str_size); + source.adjustReadPos(str_size); + return label; + } + void reset() { source.rp(0); } +}; + +/** + * Represents an address (set of labels) mapping to a subspace index + **/ +struct LabelBlock { + static constexpr size_t npos = -1; + size_t ss_idx; + ConstArrayRef<vespalib::stringref> address; + operator bool() const { return ss_idx != npos; } +}; + +/** + * Utility for reading a buffer with serialized labels + * as a stream of LabelBlock objects. + **/ +class LabelBlockStream { +private: + size_t _num_subspaces; + LabelStream _labels; + size_t _subspace_index; + std::vector<vespalib::stringref> _current_address; +public: + LabelBlock next_block() { + if (_subspace_index < _num_subspaces) { + for (auto & label : _current_address) { + label = _labels.next_label(); + } + return LabelBlock{_subspace_index++, _current_address}; + } else { + return LabelBlock{LabelBlock::npos, {}}; + } + } + + void reset() { + _subspace_index = 0; + _labels.reset(); + } + + LabelBlockStream(uint32_t num_subspaces, + ConstArrayRef<char> label_buf, + uint32_t num_mapped_dims) + : _num_subspaces(num_subspaces), + _labels(label_buf), + _subspace_index(num_subspaces), + _current_address(num_mapped_dims) + {} + + ~LabelBlockStream(); +}; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.cpp b/eval/src/vespa/eval/streamed/streamed_value_view.cpp new file mode 100644 index 00000000000..87e1e676692 --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "streamed_value_view.h" + +namespace vespalib::eval { + +StreamedValueView::~StreamedValueView() = default; + +} // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h new file mode 100644 index 00000000000..e37f442dd9a --- /dev/null +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -0,0 +1,45 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/value_type.h> +#include <vespa/eval/eval/value.h> +#include "streamed_value_index.h" +#include <cassert> + +namespace vespalib::eval { + + /** + * Same characteristics as StreamedValue, but does not + * own its data - refers to type, cells and serialized + * labels that must be kept outside the Value. + **/ +class StreamedValueView : public Value +{ +private: + const ValueType &_type; + TypedCells _cells_ref; + StreamedValueIndex _my_index; + +public: + StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, + TypedCells cells, size_t num_subspaces, + ConstArrayRef<char> labels_buf) + : _type(type), + _cells_ref(cells), + _my_index(num_mapped_dimensions, num_subspaces, labels_buf) + { + assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size); + } + + ~StreamedValueView(); + const ValueType &type() const final override { return _type; } + TypedCells cells() const final override { return _cells_ref; } + const Value::Index &index() const final override { return _my_index; } + MemoryUsage get_memory_usage() const final override { + return self_memory_usage<StreamedValueView>(); + } + auto get_data_reference() const { return _my_index.get_data_reference(); } +}; + +} // namespace |