diff options
41 files changed, 962 insertions, 674 deletions
diff --git a/document/src/test/resources/tensor/multi_cell_tensor__cpp b/document/src/test/resources/tensor/multi_cell_tensor__cpp Binary files differindex deb53463fb5..9adda236a4a 100644 --- a/document/src/test/resources/tensor/multi_cell_tensor__cpp +++ b/document/src/test/resources/tensor/multi_cell_tensor__cpp diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp index fbc60cc09af..f763c92741c 100644 --- a/document/src/vespa/document/update/tensor_partial_update.cpp +++ b/document/src/vespa/document/update/tensor_partial_update.cpp @@ -5,6 +5,7 @@ #include <vespa/vespalib/util/overload.h> #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/vespalib/util/shared_string_repo.h> #include <cassert> #include <set> @@ -43,7 +44,8 @@ struct DenseCoords { } ~DenseCoords(); void clear() { offset = 0; current = 0; } - void convert_label(vespalib::stringref label) { + void convert_label(label_t label_id) { + vespalib::string label = SharedStringRepo::Handle::string_from_id(label_id); uint32_t coord = 0; for (char c : label) { if (c < '0' || c > '9') { // bad char @@ -71,9 +73,9 @@ struct DenseCoords { DenseCoords::~DenseCoords() = default; struct SparseCoords { - std::vector<vespalib::stringref> addr; - std::vector<vespalib::stringref *> next_result_refs; - std::vector<const vespalib::stringref *> lookup_refs; + std::vector<label_t> addr; + std::vector<label_t *> next_result_refs; + std::vector<const label_t *> lookup_refs; std::vector<size_t> lookup_view_dims; SparseCoords(size_t sz) : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) @@ -327,7 +329,7 @@ calc_mapped_dimension_indexes(const ValueType& input_type, struct ModifierCoords { - std::vector<const vespalib::stringref *> lookup_refs; + std::vector<const label_t *> lookup_refs; std::vector<size_t> lookup_view_dims; ModifierCoords(const SparseCoords& input_coords, diff --git a/eval/src/tests/eval/fast_value/fast_value_test.cpp b/eval/src/tests/eval/fast_value/fast_value_test.cpp index 03658d8351b..e809fb1bcda 100644 --- a/eval/src/tests/eval/fast_value/fast_value_test.cpp +++ b/eval/src/tests/eval/fast_value/fast_value_test.cpp @@ -8,6 +8,8 @@ using namespace vespalib; using namespace vespalib::eval; +using Handle = SharedStringRepo::Handle; + TEST(FastCellsTest, push_back_fast_works) { FastCells<float> cells(3); EXPECT_EQ(cells.capacity, 4); @@ -60,38 +62,37 @@ TEST(FastCellsTest, add_cells_works) { using SA = std::vector<vespalib::stringref>; -TEST(FastValueBuilderTest, dense_add_subspace_robustness) { +TEST(FastValueBuilderTest, scalar_add_subspace_robustness) { auto factory = FastValueBuilderFactory::get(); - ValueType type = ValueType::from_spec("tensor(x[2])"); + ValueType type = ValueType::from_spec("double"); auto builder = factory.create_value_builder<double>(type); - auto subspace = builder->add_subspace({}); + auto subspace = builder->add_subspace(); subspace[0] = 17.0; - subspace[1] = 666; - auto other = builder->add_subspace({}); - other[1] = 42.0; + auto other = builder->add_subspace(); + other[0] = 42.0; auto value = builder->build(std::move(builder)); + EXPECT_EQ(value->index().size(), 1); auto actual = spec_from_value(*value); - auto expected = TensorSpec("tensor(x[2])"). - add({{"x", 0}}, 17.0). - add({{"x", 1}}, 42.0); - EXPECT_EQ(actual, expected); + auto expected = TensorSpec("double"). + add({}, 42.0); + EXPECT_EQ(actual, expected); } -TEST(FastValueBuilderTest, sparse_add_subspace_robustness) { +TEST(FastValueBuilderTest, dense_add_subspace_robustness) { auto factory = FastValueBuilderFactory::get(); - ValueType type = ValueType::from_spec("tensor(x{})"); + ValueType type = ValueType::from_spec("tensor(x[2])"); auto builder = factory.create_value_builder<double>(type); - auto subspace = builder->add_subspace(SA{"foo"}); + auto subspace = builder->add_subspace(); subspace[0] = 17.0; - subspace = builder->add_subspace(SA{"bar"}); - subspace[0] = 18.0; - auto other = builder->add_subspace(SA{"foo"}); - other[0] = 42.0; + subspace[1] = 666; + auto other = builder->add_subspace(); + other[1] = 42.0; auto value = builder->build(std::move(builder)); + EXPECT_EQ(value->index().size(), 1); auto actual = spec_from_value(*value); - auto expected = TensorSpec("tensor(x{})"). - add({{"x", "bar"}}, 18.0). - add({{"x", "foo"}}, 42.0); + auto expected = TensorSpec("tensor(x[2])"). + add({{"x", 0}}, 17.0). + add({{"x", 1}}, 42.0); EXPECT_EQ(actual, expected); } @@ -100,21 +101,43 @@ TEST(FastValueBuilderTest, mixed_add_subspace_robustness) { ValueType type = ValueType::from_spec("tensor(x{},y[2])"); auto builder = factory.create_value_builder<double>(type); auto subspace = builder->add_subspace(SA{"foo"}); - subspace[0] = 17.0; - subspace[1] = 666; + subspace[0] = 1.0; + subspace[1] = 5.0; subspace = builder->add_subspace(SA{"bar"}); - subspace[0] = 18.0; - subspace[1] = 19.0; + subspace[0] = 2.0; + subspace[1] = 10.0; auto other = builder->add_subspace(SA{"foo"}); - other[1] = 42.0; + other[0] = 3.0; + other[1] = 15.0; auto value = builder->build(std::move(builder)); - auto actual = spec_from_value(*value); - auto expected = TensorSpec("tensor(x{},y[2])"). - add({{"x", "foo"}, {"y", 0}}, 17.0). - add({{"x", "bar"}, {"y", 0}}, 18.0). - add({{"x", "bar"}, {"y", 1}}, 19.0). - add({{"x", "foo"}, {"y", 1}}, 42.0); - EXPECT_EQ(actual, expected); + EXPECT_EQ(value->index().size(), 3); + Handle foo("foo"); + Handle bar("bar"); + label_t label; + label_t *label_ptr = &label; + size_t subspace_idx; + auto get_subspace = [&]() { + auto cells = value->cells().typify<double>(); + return ConstArrayRef<double>(cells.begin() + subspace_idx * 2, 2); + }; + auto view = value->index().create_view({}); + view->lookup({}); + while (view->next_result({&label_ptr, 1}, subspace_idx)) { + if (label == bar.id()) { + auto values = get_subspace(); + EXPECT_EQ(values[0], 2.0); + EXPECT_EQ(values[1], 10.0); + } else { + EXPECT_EQ(label, foo.id()); + auto values = get_subspace(); + if (values[0] == 1) { + EXPECT_EQ(values[1], 5.0); + } else { + EXPECT_EQ(values[0], 3.0); + EXPECT_EQ(values[1], 15.0); + } + } + } } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp index c05f9976e1a..1691d5c263c 100644 --- a/eval/src/tests/eval/simple_value/simple_value_test.cpp +++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp @@ -16,8 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<vespalib::stringref *>; -using CPA = std::vector<const vespalib::stringref *>; +using PA = std::vector<label_t *>; +using CPA = std::vector<const label_t *>; + +using Handle = SharedStringRepo::Handle; + +vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -98,17 +102,18 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) { std::unique_ptr<Value> value = builder->build(std::move(builder)); EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); - vespalib::stringref query = "b"; - vespalib::stringref label; + Handle query_handle("b"); + label_t query = query_handle.id(); + label_t label; size_t subspace; + std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); - EXPECT_TRUE(view->next_result(PA{&label}, subspace)); - EXPECT_EQ(label, "aa"); - EXPECT_EQ(subspace, 2); - EXPECT_TRUE(view->next_result(PA{&label}, subspace)); - EXPECT_EQ(label, "bb"); - EXPECT_EQ(subspace, 3); - EXPECT_FALSE(view->next_result(PA{&label}, subspace)); + while (view->next_result(PA{&label}, subspace)) { + result[as_str(label)] = subspace; + } + EXPECT_EQ(result.size(), 2); + EXPECT_EQ(result["aa"], 2); + EXPECT_EQ(result["bb"], 3); } TEST(SimpleValueTest, new_generic_join_works_for_simple_values) { diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp index 05d6e20451c..5221c4eda64 100644 --- a/eval/src/tests/streamed/value/streamed_value_test.cpp +++ b/eval/src/tests/streamed/value/streamed_value_test.cpp @@ -16,8 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<vespalib::stringref *>; -using CPA = std::vector<const vespalib::stringref *>; +using PA = std::vector<label_t *>; +using CPA = std::vector<const label_t *>; + +using Handle = SharedStringRepo::Handle; + +vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -98,17 +102,18 @@ TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) { std::unique_ptr<Value> value = builder->build(std::move(builder)); EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); - vespalib::stringref query = "b"; - vespalib::stringref label; + Handle query_handle("b"); + label_t query = query_handle.id(); + label_t label; size_t subspace; + std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); - EXPECT_TRUE(view->next_result(PA{&label}, subspace)); - EXPECT_EQ(label, "aa"); - EXPECT_EQ(subspace, 2); - EXPECT_TRUE(view->next_result(PA{&label}, subspace)); - EXPECT_EQ(label, "bb"); - EXPECT_EQ(subspace, 3); - EXPECT_FALSE(view->next_result(PA{&label}, subspace)); + while (view->next_result(PA{&label}, subspace)) { + result[as_str(label)] = subspace; + } + EXPECT_EQ(result.size(), 2); + EXPECT_EQ(result["aa"], 2); + EXPECT_EQ(result["bb"], 3); } TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) { diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt index 01eeff49662..5f8dd478a7b 100644 --- a/eval/src/vespa/eval/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/CMakeLists.txt @@ -10,6 +10,7 @@ vespa_add_library(eval_eval OBJECT delete_node.cpp dense_cells_value.cpp double_value_builder.cpp + fast_addr_map.cpp fast_forest.cpp fast_sparse_map.cpp fast_value.cpp diff --git a/eval/src/vespa/eval/eval/fast_addr_map.cpp b/eval/src/vespa/eval/eval/fast_addr_map.cpp new file mode 100644 index 00000000000..73163f411e6 --- /dev/null +++ b/eval/src/vespa/eval/eval/fast_addr_map.cpp @@ -0,0 +1,9 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "fast_addr_map.h" + +namespace vespalib::eval { + +FastAddrMap::~FastAddrMap() = default; + +} diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h new file mode 100644 index 00000000000..a8a82718a28 --- /dev/null +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -0,0 +1,152 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "label.h" +#include "memory_usage_stuff.h" +#include <vespa/vespalib/util/arrayref.h> +#include <vespa/vespalib/stllike/identity.h> +#include <vespa/vespalib/stllike/hashtable.h> +#include <vespa/vespalib/util/shared_string_repo.h> +#include <vector> + +namespace vespalib::eval { + +/** + * A wrapper around vespalib::hashtable, using it to map a list of + * labels (a sparse address) to an integer value (dense subspace + * index). Labels are represented by string enum values stored and + * handled outside this class. + **/ +class FastAddrMap +{ +public: + // label hasing functions + static constexpr uint32_t hash_label(label_t label) { return label; } + static constexpr uint32_t hash_label(const label_t *label) { return *label; } + static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) { + return ((full_hash * 31) + next_hash); + } + template <typename T> + static constexpr uint32_t hash_labels(ConstArrayRef<T> addr) { + uint32_t hash = 0; + for (const T &label: addr) { + hash = combine_label_hash(hash, hash_label(label)); + } + return hash; + } + + // typed uint32_t index used to identify sparse address/dense subspace + struct Tag { + uint32_t idx; + static constexpr uint32_t npos() { return uint32_t(-1); } + static constexpr Tag make_invalid() { return Tag{npos()}; } + constexpr bool valid() const { return (idx != npos()); } + }; + + // sparse hash set entry + struct Entry { + Tag tag; + uint32_t hash; + }; + + // alternative key(s) used for lookup in sparse hash set + template <typename T> struct AltKey { + ConstArrayRef<T> key; + uint32_t hash; + }; + + // view able to convert tags into sparse addresses + struct LabelView { + size_t addr_size; + const std::vector<label_t> &labels; + LabelView(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view) + : addr_size(num_mapped_dims), labels(handle_view.handles()) {} + ConstArrayRef<label_t> get_addr(size_t idx) const { + return {&labels[idx * addr_size], addr_size}; + } + }; + + // hashing functor for sparse hash set + struct Hash { + template <typename T> + constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; } + constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; } + }; + + // equality functor for sparse hash set + struct Equal { + const LabelView &label_view; + Equal(const LabelView &label_view_in) : label_view(label_view_in) {} + static constexpr bool eq_labels(label_t a, label_t b) { return (a == b); } + static constexpr bool eq_labels(label_t a, const label_t *b) { return (a == *b); } + template <typename T> + bool operator()(const Entry &a, const AltKey<T> &b) const { + if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) { + return false; + } + auto a_key = label_view.get_addr(a.tag.idx); + for (size_t i = 0; i < a_key.size(); ++i) { + if (!eq_labels(a_key[i], b.key[i])) { + return false; + } + } + return true; + } + }; + + using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>; + +private: + LabelView _labels; + HashType _map; + +public: + FastAddrMap(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view, size_t expected_subspaces) + : _labels(num_mapped_dims, handle_view), + _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} + ~FastAddrMap(); + FastAddrMap(const FastAddrMap &) = delete; + FastAddrMap &operator=(const FastAddrMap &) = delete; + FastAddrMap(FastAddrMap &&) = delete; + FastAddrMap &operator=(FastAddrMap &&) = delete; + static constexpr size_t npos() { return -1; } + ConstArrayRef<label_t> get_addr(size_t idx) const { return _labels.get_addr(idx); } + size_t size() const { return _map.size(); } + constexpr size_t addr_size() const { return _labels.addr_size; } + template <typename T> + size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const { + AltKey<T> key{addr, hash}; + auto pos = _map.find(key); + return (pos == _map.end()) ? npos() : pos->tag.idx; + } + template <typename T> + size_t lookup(ConstArrayRef<T> addr) const { + return lookup(addr, hash_labels(addr)); + } + void add_mapping(uint32_t hash) { + uint32_t idx = _map.size(); + _map.force_insert(Entry{{idx}, hash}); + } + template <typename F> + void each_map_entry(F &&f) const { + _map.for_each([&](const auto &entry) + { + f(entry.tag.idx, entry.hash); + }); + } + MemoryUsage estimate_extra_memory_usage() const { + MemoryUsage extra_usage; + size_t map_self_size = sizeof(_map); + size_t map_used = _map.getMemoryUsed(); + size_t map_allocated = _map.getMemoryConsumption(); + // avoid double-counting the map itself + map_used = std::min(map_used, map_used - map_self_size); + map_allocated = std::min(map_allocated, map_allocated - map_self_size); + extra_usage.incUsedBytes(map_used); + extra_usage.incAllocatedBytes(map_allocated); + return extra_usage; + } +}; + +} diff --git a/eval/src/vespa/eval/eval/fast_value.cpp b/eval/src/vespa/eval/eval/fast_value.cpp index 116e561a868..96d0fa84149 100644 --- a/eval/src/vespa/eval/eval/fast_value.cpp +++ b/eval/src/vespa/eval/eval/fast_value.cpp @@ -11,7 +11,7 @@ namespace vespalib::eval { namespace { struct CreateFastValueBuilderBase { - template <typename T> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type, + template <typename T, typename R2> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type, size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) { assert(check_cell_type<T>(type.cell_type())); @@ -20,7 +20,7 @@ struct CreateFastValueBuilderBase { } else if (num_mapped_dims == 0) { return std::make_unique<FastDenseValue<T>>(type, subspace_size); } else { - return std::make_unique<FastValue<T>>(type, num_mapped_dims, subspace_size, expected_subspaces); + return std::make_unique<FastValue<T,R2::value>>(type, num_mapped_dims, subspace_size, expected_subspaces); } } }; @@ -32,11 +32,11 @@ struct CreateFastValueBuilderBase { std::unique_ptr<Value::Index::View> FastValueIndex::create_view(const std::vector<size_t> &dims) const { - if (map.num_dims() == 0) { + if (map.addr_size() == 0) { return TrivialIndex::get().create_view(dims); } else if (dims.empty()) { return std::make_unique<FastIterateView>(map); - } else if (dims.size() == map.num_dims()) { + } else if (dims.size() == map.addr_size()) { return std::make_unique<FastLookupView>(map); } else { return std::make_unique<FastFilterView>(map, dims); @@ -49,10 +49,11 @@ FastValueBuilderFactory::FastValueBuilderFactory() = default; FastValueBuilderFactory FastValueBuilderFactory::_factory; std::unique_ptr<ValueBuilderBase> -FastValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size, - size_t expected_subspaces) const +FastValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size, + size_t expected_subspaces) const { - return typify_invoke<1,TypifyCellType,CreateFastValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces); + using MyTypify = TypifyValue<TypifyCellType,TypifyBool>; + return typify_invoke<2,MyTypify,CreateFastValueBuilderBase>(type.cell_type(), transient, type, num_mapped_dims, subspace_size, expected_subspaces); } //----------------------------------------------------------------------------- diff --git a/eval/src/vespa/eval/eval/fast_value.h b/eval/src/vespa/eval/eval/fast_value.h index ac924ecc6eb..c6280b492db 100644 --- a/eval/src/vespa/eval/eval/fast_value.h +++ b/eval/src/vespa/eval/eval/fast_value.h @@ -19,7 +19,7 @@ class FastValueBuilderFactory : public ValueBuilderFactory { private: FastValueBuilderFactory(); static FastValueBuilderFactory _factory; - std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, + std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override; public: static const FastValueBuilderFactory &get() { return _factory; } diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 9914378cc9e..972aa68b8bd 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -1,11 +1,10 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "value.h" -#include "fast_sparse_map.h" +#include "fast_addr_map.h" #include "inline_operation.h" #include <vespa/eval/instruction/generic_join.h> -#include <vespa/vespalib/stllike/hash_map.hpp> -#include <vespa/vespalib/util/alloc.h> +#include <vespa/vespalib/stllike/hashtable.hpp> namespace vespalib::eval { @@ -18,22 +17,22 @@ namespace { // look up a full address in the map directly struct FastLookupView : public Value::Index::View { - const FastSparseMap ↦ - size_t subspace; + const FastAddrMap ↦ + size_t subspace; - FastLookupView(const FastSparseMap &map_in) - : map(map_in), subspace(FastSparseMap::npos()) {} + FastLookupView(const FastAddrMap &map_in) + : map(map_in), subspace(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { subspace = map.lookup(addr); } - bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override { - if (subspace == FastSparseMap::npos()) { + bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { + if (subspace == FastAddrMap::npos()) { return false; } idx_out = subspace; - subspace = FastSparseMap::npos(); + subspace = FastAddrMap::npos(); return true; } }; @@ -43,30 +42,27 @@ struct FastLookupView : public Value::Index::View { // find matching mappings for a partial address with brute force filtering struct FastFilterView : public Value::Index::View { - using Label = FastSparseMap::HashedLabel; - - size_t num_mapped_dims; - const std::vector<Label> &labels; + const FastAddrMap ↦ std::vector<size_t> match_dims; std::vector<size_t> extract_dims; - std::vector<Label> query; + std::vector<label_t> query; size_t pos; - bool is_match() const { + bool is_match(ConstArrayRef<label_t> addr) const { for (size_t i = 0; i < query.size(); ++i) { - if (query[i].hash != labels[pos + match_dims[i]].hash) { + if (query[i] != addr[match_dims[i]]) { return false; } } return true; } - FastFilterView(const FastSparseMap &map, const std::vector<size_t> &match_dims_in) - : num_mapped_dims(map.num_dims()), labels(map.labels()), match_dims(match_dims_in), - extract_dims(), query(match_dims.size(), Label()), pos(labels.size()) + FastFilterView(const FastAddrMap &map_in, const std::vector<size_t> &match_dims_in) + : map(map_in), match_dims(match_dims_in), + extract_dims(), query(match_dims.size()), pos(FastAddrMap::npos()) { auto my_pos = match_dims.begin(); - for (size_t i = 0; i < num_mapped_dims; ++i) { + for (size_t i = 0; i < map.addr_size(); ++i) { if ((my_pos == match_dims.end()) || (*my_pos != i)) { extract_dims.push_back(i); } else { @@ -74,29 +70,29 @@ struct FastFilterView : public Value::Index::View { } } assert(my_pos == match_dims.end()); - assert((match_dims.size() + extract_dims.size()) == num_mapped_dims); + assert((match_dims.size() + extract_dims.size()) == map.addr_size()); } - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { - query[i] = Label(*addr[i]); + query[i] = *addr[i]; } pos = 0; } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { - while (pos < labels.size()) { - if (is_match()) { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + while (pos < map.size()) { + auto addr = map.get_addr(pos); + if (is_match(addr)) { assert(addr_out.size() == extract_dims.size()); for (size_t i = 0; i < extract_dims.size(); ++i) { - *addr_out[i] = labels[pos + extract_dims[i]].label; + *addr_out[i] = addr[extract_dims[i]]; } - idx_out = (pos / num_mapped_dims); // is this expensive? - pos += num_mapped_dims; + idx_out = pos++; return true; } - pos += num_mapped_dims; + ++pos; } return false; } @@ -107,29 +103,26 @@ struct FastFilterView : public Value::Index::View { // iterate all mappings struct FastIterateView : public Value::Index::View { - using Labels = std::vector<FastSparseMap::HashedLabel>; - - size_t num_mapped_dims; - const Labels &labels; - size_t pos; + const FastAddrMap ↦ + size_t pos; - FastIterateView(const FastSparseMap &map) - : num_mapped_dims(map.num_dims()), labels(map.labels()), pos(labels.size()) {} + FastIterateView(const FastAddrMap &map_in) + : map(map_in), pos(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const vespalib::stringref*>) override { + void lookup(ConstArrayRef<const label_t*>) override { pos = 0; } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { - if (pos >= labels.size()) { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + if (pos >= map.size()) { return false; } - assert(addr_out.size() == num_mapped_dims); - for (size_t i = 0; i < num_mapped_dims; ++i) { - *addr_out[i] = labels[pos + i].label; + auto addr = map.get_addr(pos); + assert(addr.size() == addr_out.size()); + for (size_t i = 0; i < addr.size(); ++i) { + *addr_out[i] = addr[i]; } - idx_out = (pos / num_mapped_dims); // is this expensive? - pos += num_mapped_dims; + idx_out = pos++; return true; } }; @@ -145,9 +138,9 @@ using JoinAddrSource = instruction::SparseJoinPlan::Source; // operations by calling inline functions directly. struct FastValueIndex final : Value::Index { - FastSparseMap map; - FastValueIndex(size_t num_mapped_dims_in, size_t expected_subspaces_in) - : map(num_mapped_dims_in, expected_subspaces_in) {} + FastAddrMap map; + FastValueIndex(size_t num_mapped_dims_in, SharedStringRepo::HandleView handle_view, size_t expected_subspaces_in) + : map(num_mapped_dims_in, handle_view, expected_subspaces_in) {} template <typename LCT, typename RCT, typename OCT, typename Fun> static const Value &sparse_full_overlap_join(const ValueType &res_type, const Fun &fun, @@ -220,31 +213,64 @@ struct FastCells { //----------------------------------------------------------------------------- -template <typename T> +template <typename T, bool transient> struct FastValue final : Value, ValueBuilder<T> { + using Handles = std::conditional<transient, + SharedStringRepo::WeakHandles, + SharedStringRepo::StrongHandles>::type; + ValueType my_type; size_t my_subspace_size; + Handles my_handles; FastValueIndex my_index; FastCells<T> my_cells; FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) : my_type(type_in), my_subspace_size(subspace_size_in), - my_index(num_mapped_dims_in, expected_subspaces_in), + my_handles(expected_subspaces_in * num_mapped_dims_in), + my_index(num_mapped_dims_in, my_handles.view(), expected_subspaces_in), my_cells(subspace_size_in * expected_subspaces_in) {} ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } TypedCells cells() const override { return TypedCells(my_cells.memory, get_cell_type<T>(), my_cells.size); } + void add_mapping(ConstArrayRef<vespalib::stringref> addr) { + if constexpr (transient) { + (void) addr; + abort(); // cannot use this for transient values + } else { + uint32_t hash = 0; + for (const auto &label: addr) { + hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(my_handles.add(label))); + } + my_index.map.add_mapping(hash); + } + } + void add_mapping(ConstArrayRef<label_t> addr) { + uint32_t hash = 0; + for (label_t label: addr) { + hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(label)); + my_handles.add(label); + } + my_index.map.add_mapping(hash); + } + void add_mapping(ConstArrayRef<label_t> addr, uint32_t hash) { + for (label_t label: addr) { + my_handles.add(label); + } + my_index.map.add_mapping(hash); + } ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override { - size_t idx = my_index.map.add_mapping(addr) * my_subspace_size; - if (__builtin_expect((idx == my_cells.size), true)) { - return my_cells.add_cells(my_subspace_size); - } - return ArrayRef<T>(my_cells.get(idx), my_subspace_size); + add_mapping(addr); + return my_cells.add_cells(my_subspace_size); + } + ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + add_mapping(addr); + return my_cells.add_cells(my_subspace_size); } std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { - if (my_index.map.num_dims() == 0) { + if (my_index.map.addr_size() == 0) { assert(my_index.map.size() == 1); } assert(my_cells.size == (my_index.map.size() * my_subspace_size)); @@ -254,13 +280,14 @@ struct FastValue final : Value, ValueBuilder<T> { return std::unique_ptr<Value>(this); } MemoryUsage get_memory_usage() const override { - MemoryUsage usage = self_memory_usage<FastValue<T>>(); + MemoryUsage usage = self_memory_usage<FastValue<T,transient>>(); + usage.merge(vector_extra_memory_usage(my_handles.view().handles())); usage.merge(my_index.map.estimate_extra_memory_usage()); usage.merge(my_cells.estimate_extra_memory_usage()); return usage; } }; -template <typename T> FastValue<T>::~FastValue() = default; +template <typename T,bool transient> FastValue<T,transient>::~FastValue() = default; //----------------------------------------------------------------------------- @@ -282,6 +309,9 @@ struct FastDenseValue final : Value, ValueBuilder<T> { ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) override { return ArrayRef<T>(my_cells.get(0), my_cells.size); } + ArrayRef<T> add_subspace(ConstArrayRef<label_t>) override { + return ArrayRef<T>(my_cells.get(0), my_cells.size); + } std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { ValueBuilder<T>* me = this; assert(me == self.get()); @@ -289,7 +319,7 @@ struct FastDenseValue final : Value, ValueBuilder<T> { return std::unique_ptr<Value>(this); } MemoryUsage get_memory_usage() const override { - MemoryUsage usage = self_memory_usage<FastValue<T>>(); + MemoryUsage usage = self_memory_usage<FastDenseValue<T>>(); usage.merge(my_cells.estimate_extra_memory_usage()); return usage; } @@ -302,6 +332,7 @@ template <typename T> struct FastScalarBuilder final : ValueBuilder<T> { T _value; ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) final override { return ArrayRef<T>(&_value, 1); } + ArrayRef<T> add_subspace(ConstArrayRef<label_t>) final override { return ArrayRef<T>(&_value, 1); }; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) final override { return std::make_unique<ScalarValue<T>>(_value); } }; @@ -313,19 +344,16 @@ FastValueIndex::sparse_full_overlap_join(const ValueType &res_type, const Fun &f const FastValueIndex &lhs, const FastValueIndex &rhs, ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash) { - auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, lhs.map.size()); - auto &result_map = result.my_index.map; - lhs.map.each_map_entry([&](auto lhs_subspace, auto hash) - { - auto rhs_subspace = rhs.map.lookup(hash); - if (rhs_subspace != FastSparseMap::npos()) { - auto idx = result_map.add_mapping(lhs.map.make_addr(lhs_subspace), hash); - if (__builtin_expect((idx == result.my_cells.size), true)) { - auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); - result.my_cells.push_back_fast(cell_value); - } - } - }); + auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, lhs.map.size()); + lhs.map.each_map_entry([&](auto lhs_subspace, auto hash) { + auto lhs_addr = lhs.map.get_addr(lhs_subspace); + auto rhs_subspace = rhs.map.lookup(lhs_addr, hash); + if (rhs_subspace != FastAddrMap::npos()) { + result.add_mapping(lhs_addr, hash); + auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); + result.my_cells.push_back_fast(cell_value); + } + }); return result; } @@ -338,10 +366,9 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun const std::vector<JoinAddrSource> &addr_sources, ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash) { - using HashedLabelRef = std::reference_wrapper<const FastSparseMap::HashedLabel>; size_t num_mapped_dims = addr_sources.size(); - auto &result = stash.create<FastValue<OCT>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size()); - std::vector<HashedLabelRef> output_addr(num_mapped_dims, FastSparseMap::empty_label); + auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size()); + std::vector<label_t> output_addr(num_mapped_dims); std::vector<size_t> store_lhs_idx; std::vector<size_t> store_rhs_idx; size_t out_idx = 0; @@ -359,24 +386,22 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun } assert(out_idx == output_addr.size()); for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) { - auto l_addr = lhs.map.make_addr(lhs_subspace); + auto l_addr = lhs.map.get_addr(lhs_subspace); assert(l_addr.size() == store_lhs_idx.size()); for (size_t i = 0; i < store_lhs_idx.size(); ++i) { size_t addr_idx = store_lhs_idx[i]; output_addr[addr_idx] = l_addr[i]; } for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) { - auto r_addr = rhs.map.make_addr(rhs_subspace); + auto r_addr = rhs.map.get_addr(rhs_subspace); assert(r_addr.size() == store_rhs_idx.size()); for (size_t i = 0; i < store_rhs_idx.size(); ++i) { size_t addr_idx = store_rhs_idx[i]; output_addr[addr_idx] = r_addr[i]; } - auto idx = result.my_index.map.add_mapping(ConstArrayRef(output_addr)); - if (__builtin_expect((idx == result.my_cells.size), true)) { - auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); - result.my_cells.push_back_fast(cell_value); - } + result.add_mapping(ConstArrayRef(output_addr)); + auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); + result.my_cells.push_back_fast(cell_value); } } return result; @@ -391,22 +416,22 @@ FastValueIndex::sparse_only_merge(const ValueType &res_type, const Fun &fun, ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash) { size_t guess_size = lhs.map.size() + rhs.map.size(); - auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, guess_size); - result.my_index = lhs; - for (auto val : lhs_cells) { - result.my_cells.push_back_fast(val); - } + auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, guess_size); + lhs.map.each_map_entry([&](auto lhs_subspace, auto hash) + { + result.add_mapping(lhs.map.get_addr(lhs_subspace), hash); + result.my_cells.push_back_fast(lhs_cells[lhs_subspace]); + }); rhs.map.each_map_entry([&](auto rhs_subspace, auto hash) { - auto lhs_subspace = lhs.map.lookup(hash); - if (lhs_subspace == FastSparseMap::npos()) { - auto idx = result.my_index.map.add_mapping(rhs.map.make_addr(rhs_subspace), hash); - if (__builtin_expect((idx == result.my_cells.size), true)) { - result.my_cells.push_back_fast(rhs_cells[rhs_subspace]); - } + auto rhs_addr = rhs.map.get_addr(rhs_subspace); + auto result_subspace = result.my_index.map.lookup(rhs_addr, hash); + if (result_subspace == FastAddrMap::npos()) { + result.add_mapping(rhs_addr, hash); + result.my_cells.push_back_fast(rhs_cells[rhs_subspace]); } else { - auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]); - *result.my_cells.get(lhs_subspace) = cell_value; + OCT &out_cell = *result.my_cells.get(result_subspace); + out_cell = fun(out_cell, rhs_cells[rhs_subspace]); } }); return result; diff --git a/eval/src/vespa/eval/eval/label.h b/eval/src/vespa/eval/eval/label.h new file mode 100644 index 00000000000..931f96a4f1a --- /dev/null +++ b/eval/src/vespa/eval/eval/label.h @@ -0,0 +1,15 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace vespalib::eval { + +// We use string ids from SharedStringRepo as labels. Note that +// label_t represents the lightweight reference type. Other structures +// (Handle/StrongHandles) are needed to keep the id valid. + +using label_t = uint32_t; + +} diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp index 113f89f77fb..0cbbb29ecf1 100644 --- a/eval/src/vespa/eval/eval/simple_value.cpp +++ b/eval/src/vespa/eval/eval/simple_value.cpp @@ -30,7 +30,8 @@ struct CreateSimpleValueBuilderBase { // look up a full address in the map directly struct SimpleLookupView : public Value::Index::View { - using Labels = std::vector<vespalib::string>; + using Handle = SharedStringRepo::Handle; + using Labels = std::vector<Handle>; using Map = std::map<Labels, size_t>; const Map ↦ @@ -38,17 +39,17 @@ struct SimpleLookupView : public Value::Index::View { Map::const_iterator pos; SimpleLookupView(const Map &map_in, size_t num_dims) - : map(map_in), my_addr(num_dims, ""), pos(map.end()) {} + : map(map_in), my_addr(num_dims), pos(map.end()) {} - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { assert(addr.size() == my_addr.size()); for (size_t i = 0; i < my_addr.size(); ++i) { - my_addr[i] = *addr[i]; + my_addr[i] = Handle::handle_from_id(*addr[i]); } pos = map.find(my_addr); } - bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override { + bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { if (pos == map.end()) { return false; } @@ -63,13 +64,14 @@ struct SimpleLookupView : public Value::Index::View { // find matching mappings for a partial address with brute force filtering struct SimpleFilterView : public Value::Index::View { - using Labels = std::vector<vespalib::string>; + using Handle = SharedStringRepo::Handle; + using Labels = std::vector<Handle>; using Map = std::map<Labels, size_t>; const Map ↦ std::vector<size_t> match_dims; std::vector<size_t> extract_dims; - std::vector<vespalib::string> query; + std::vector<Handle> query; Map::const_iterator pos; bool is_match() const { @@ -82,7 +84,7 @@ struct SimpleFilterView : public Value::Index::View { } SimpleFilterView(const Map &map_in, const std::vector<size_t> &match_dims_in, size_t num_dims) - : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size(), ""), pos(map.end()) + : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size()), pos(map.end()) { auto my_pos = match_dims.begin(); for (size_t i = 0; i < num_dims; ++i) { @@ -96,20 +98,20 @@ struct SimpleFilterView : public Value::Index::View { assert((match_dims.size() + extract_dims.size()) == num_dims); } - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { - query[i] = *addr[i]; + query[i] = Handle::handle_from_id(*addr[i]); } pos = map.begin(); } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { while (pos != map.end()) { if (is_match()) { assert(addr_out.size() == extract_dims.size()); for (size_t i = 0; i < extract_dims.size(); ++i) { - *addr_out[i] = pos->first[extract_dims[i]]; + *addr_out[i] = pos->first[extract_dims[i]].id(); } idx_out = pos->second; ++pos; @@ -126,7 +128,8 @@ struct SimpleFilterView : public Value::Index::View { // iterate all mappings struct SimpleIterateView : public Value::Index::View { - using Labels = std::vector<vespalib::string>; + using Handle = SharedStringRepo::Handle; + using Labels = std::vector<Handle>; using Map = std::map<Labels, size_t>; const Map ↦ @@ -135,17 +138,17 @@ struct SimpleIterateView : public Value::Index::View { SimpleIterateView(const Map &map_in) : map(map_in), pos(map.end()) {} - void lookup(ConstArrayRef<const vespalib::stringref*>) override { + void lookup(ConstArrayRef<const label_t*>) override { pos = map.begin(); } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { if (pos == map.end()) { return false; } assert(addr_out.size() == pos->first.size()); for (size_t i = 0; i < addr_out.size(); ++i) { - *addr_out[i] = pos->first[i]; + *addr_out[i] = pos->first[i].id(); } idx_out = pos->second; ++pos; @@ -182,6 +185,17 @@ SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr) assert(was_inserted); } +void +SimpleValue::add_mapping(ConstArrayRef<label_t> addr) +{ + Labels my_addr; + for(label_t label: addr) { + my_addr.emplace_back(Handle::handle_from_id(label)); + } + auto [ignore, was_inserted] = _index.emplace(my_addr, _index.size()); + assert(was_inserted); +} + MemoryUsage SimpleValue::estimate_extra_memory_usage() const { @@ -246,15 +260,26 @@ SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr) return ArrayRef<T>(&_cells[old_size], subspace_size()); } +template <typename T> +ArrayRef<T> +SimpleValueT<T>::add_subspace(ConstArrayRef<label_t> addr) +{ + size_t old_size = _cells.size(); + add_mapping(addr); + _cells.resize(old_size + subspace_size(), std::numeric_limits<T>::quiet_NaN()); + return ArrayRef<T>(&_cells[old_size], subspace_size()); +} + //----------------------------------------------------------------------------- SimpleValueBuilderFactory::SimpleValueBuilderFactory() = default; SimpleValueBuilderFactory SimpleValueBuilderFactory::_factory; std::unique_ptr<ValueBuilderBase> -SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size, +SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const { + (void) transient; return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces); } diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h index 590c0b4ef16..1fd645b704c 100644 --- a/eval/src/vespa/eval/eval/simple_value.h +++ b/eval/src/vespa/eval/eval/simple_value.h @@ -3,7 +3,7 @@ #pragma once #include "value.h" -#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/shared_string_repo.h> #include <vector> #include <map> @@ -26,7 +26,8 @@ class TensorSpec; class SimpleValue : public Value, public Value::Index { private: - using Labels = std::vector<vespalib::string>; + using Handle = SharedStringRepo::Handle; + using Labels = std::vector<Handle>; ValueType _type; size_t _num_mapped_dims; @@ -36,6 +37,7 @@ protected: size_t num_mapped_dims() const { return _num_mapped_dims; } size_t subspace_size() const { return _subspace_size; } void add_mapping(ConstArrayRef<vespalib::stringref> addr); + void add_mapping(ConstArrayRef<label_t> addr); MemoryUsage estimate_extra_memory_usage() const; public: SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in); @@ -62,6 +64,7 @@ public: ~SimpleValueT() override; TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); } ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override; + ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { if (num_mapped_dims() == 0) { assert(size() == 1); @@ -87,7 +90,7 @@ class SimpleValueBuilderFactory : public ValueBuilderFactory { private: SimpleValueBuilderFactory(); static SimpleValueBuilderFactory _factory; - std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, + std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override; public: static const SimpleValueBuilderFactory &get() { return _factory; } diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp index 7abc8d568cb..73c7c40636c 100644 --- a/eval/src/vespa/eval/eval/value.cpp +++ b/eval/src/vespa/eval/eval/value.cpp @@ -12,8 +12,8 @@ namespace { struct TrivialView : Value::Index::View { bool first = false; - void lookup(ConstArrayRef<const vespalib::stringref*> ) override { first = true; } - bool next_result(ConstArrayRef<vespalib::stringref*> , size_t &idx_out) override { + void lookup(ConstArrayRef<const label_t*> ) override { first = true; } + bool next_result(ConstArrayRef<label_t*> , size_t &idx_out) override { if (first) { idx_out = 0; first = false; diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h index 186c3698dcd..2efb7d7c1e4 100644 --- a/eval/src/vespa/eval/eval/value.h +++ b/eval/src/vespa/eval/eval/value.h @@ -2,6 +2,7 @@ #pragma once +#include "label.h" #include "memory_usage_stuff.h" #include "value_type.h" #include "typed_cells.h" @@ -36,13 +37,13 @@ struct Value { // partial address for the dimensions given to // create_view. Results from the lookup is extracted using // the next_result function. - virtual void lookup(ConstArrayRef<const vespalib::stringref*> addr) = 0; + virtual void lookup(ConstArrayRef<const label_t*> addr) = 0; // Extract the next result (if any) from the previous // lookup into the given partial address and index. Only // the labels for the dimensions NOT specified in // create_view will be extracted here. - virtual bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) = 0; + virtual bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) = 0; virtual ~View() {} }; @@ -163,6 +164,14 @@ struct ValueBuilder : ValueBuilderBase { // is not allowed. virtual ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) = 0; + // add a dense subspace for the given address where labels are + // specified by shared string repo ids. Note that the caller is + // responsible for making sure the ids are valid 'long enough'. + virtual ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) = 0; + + // convenience function to add a subspace with an empty address + ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<label_t>()); } + // Given the ownership of the builder itself, produce the newly // created value. This means that builders can only be used once, // it also means values can build themselves. @@ -179,26 +188,40 @@ struct ValueBuilder : ValueBuilderBase { * builder. With interoperability between all values. **/ struct ValueBuilderFactory { +private: template <typename T> - std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, + std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, bool transient, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const { assert(check_cell_type<T>(type.cell_type())); - auto base = create_value_builder_base(type, num_mapped_dims_in, subspace_size_in, expected_subspaces); + auto base = create_value_builder_base(type, transient, num_mapped_dims_in, subspace_size_in, expected_subspaces); ValueBuilder<T> *builder = dynamic_cast<ValueBuilder<T>*>(base.get()); assert(builder); base.release(); return std::unique_ptr<ValueBuilder<T>>(builder); } +public: + template <typename T> + std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, + size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const + { + return create_value_builder<T>(type, false, num_mapped_dims_in, subspace_size_in, expected_subspaces); + } + template <typename T> + std::unique_ptr<ValueBuilder<T>> create_transient_value_builder(const ValueType &type, + size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const + { + return create_value_builder<T>(type, true, num_mapped_dims_in, subspace_size_in, expected_subspaces); + } template <typename T> std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type) const { - return create_value_builder<T>(type, type.count_mapped_dimensions(), type.dense_subspace_size(), 1); + return create_value_builder<T>(type, false, type.count_mapped_dimensions(), type.dense_subspace_size(), 1); } std::unique_ptr<Value> copy(const Value &value) const; virtual ~ValueBuilderFactory() {} protected: - virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, + virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const = 0; }; diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp index 923d3f29cd3..53131da86d8 100644 --- a/eval/src/vespa/eval/eval/value_codec.cpp +++ b/eval/src/vespa/eval/eval/value_codec.cpp @@ -7,6 +7,7 @@ #include <vespa/vespalib/util/exceptions.h> #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/shared_string_repo.h> using vespalib::make_string_short::fmt; @@ -128,9 +129,10 @@ size_t maybe_decode_num_blocks(nbostream &input, bool has_mapped_dims, const For return 1; } -void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<vespalib::stringref> &addr) { +void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<label_t> &addr) { for (size_t i = 0; i < num_mapped_dims; ++i) { - output.writeSmallString(addr[i]); + vespalib::string str = SharedStringRepo::Handle::string_from_id(addr[i]); + output.writeSmallString(str); } } @@ -175,7 +177,7 @@ struct ContentDecoder { } // add implicit empty subspace if ((state.num_mapped_dims == 0) && (state.num_blocks == 0)) { - for (T &cell: builder->add_subspace({})) { + for (T &cell: builder->add_subspace()) { cell = T{}; } } @@ -229,8 +231,8 @@ struct CreateTensorSpecFromValue { TensorSpec spec(value.type().to_spec()); size_t subspace_id = 0; size_t subspace_size = value.type().dense_subspace_size(); - std::vector<vespalib::stringref> labels(value.type().count_mapped_dimensions()); - std::vector<vespalib::stringref*> label_refs; + std::vector<label_t> labels(value.type().count_mapped_dimensions()); + std::vector<label_t*> label_refs; for (auto &label: labels) { label_refs.push_back(&label); } @@ -241,7 +243,7 @@ struct CreateTensorSpecFromValue { TensorSpec::Address addr; for (const auto &dim: value.type().dimensions()) { if (dim.is_mapped()) { - addr.emplace(dim.name, labels[label_idx++]); + addr.emplace(dim.name, SharedStringRepo::Handle::string_from_id(labels[label_idx++])); } } for (size_t i = 0; i < subspace_size; ++i) { @@ -270,8 +272,8 @@ struct EncodeState { struct ContentEncoder { template<typename T> static void invoke(const Value &value, const EncodeState &state, nbostream &output) { - std::vector<vespalib::stringref> address(state.num_mapped_dims); - std::vector<vespalib::stringref*> a_refs(state.num_mapped_dims);; + std::vector<label_t> address(state.num_mapped_dims); + std::vector<label_t*> a_refs(state.num_mapped_dims);; for (size_t i = 0; i < state.num_mapped_dims; ++i) { a_refs[i] = &address[i]; } diff --git a/eval/src/vespa/eval/instruction/generic_concat.cpp b/eval/src/vespa/eval/instruction/generic_concat.cpp index fa9d2192b99..5d8ab7187c0 100644 --- a/eval/src/vespa/eval/instruction/generic_concat.cpp +++ b/eval/src/vespa/eval/instruction/generic_concat.cpp @@ -47,10 +47,10 @@ generic_concat(const Value &a, const Value &b, auto a_cells = a.cells().typify<LCT>(); auto b_cells = b.cells().typify<RCT>(); SparseJoinState sparse(sparse_plan, a.index(), b.index()); - auto builder = factory.create_value_builder<OCT>(res_type, - sparse_plan.sources.size(), - dense_plan.output_size, - sparse.first_index.size()); + auto builder = factory.create_transient_value_builder<OCT>(res_type, + sparse_plan.sources.size(), + dense_plan.output_size, + sparse.first_index.size()); auto outer = sparse.first_index.create_view({}); auto inner = sparse.second_index.create_view(sparse.second_view_dims); outer->lookup({}); diff --git a/eval/src/vespa/eval/instruction/generic_create.cpp b/eval/src/vespa/eval/instruction/generic_create.cpp index 02c89e0b43f..6e30da846e7 100644 --- a/eval/src/vespa/eval/instruction/generic_create.cpp +++ b/eval/src/vespa/eval/instruction/generic_create.cpp @@ -5,6 +5,7 @@ #include <vespa/eval/eval/array_array_map.h> #include <vespa/vespalib/util/stash.h> #include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/shared_string_repo.h> #include <cassert> using namespace vespalib::eval::tensor_function; @@ -13,6 +14,7 @@ namespace vespalib::eval::instruction { using State = InterpretedFunction::State; using Instruction = InterpretedFunction::Instruction; +using Handle = SharedStringRepo::Handle; namespace { @@ -21,12 +23,12 @@ struct CreateParam { size_t num_mapped_dims; size_t dense_subspace_size; size_t num_children; - ArrayArrayMap<vespalib::string,size_t> my_spec; + ArrayArrayMap<Handle,size_t> my_spec; const ValueBuilderFactory &factory; static constexpr size_t npos = -1; - ArrayRef<size_t> indexes(ConstArrayRef<vespalib::string> key) { + ArrayRef<size_t> indexes(ConstArrayRef<Handle> key) { auto [tag, first_time] = my_spec.lookup_or_add_entry(key); auto rv = my_spec.get_values(tag); if (first_time) { @@ -49,7 +51,7 @@ struct CreateParam { { size_t last_child = num_children - 1; for (const auto & entry : spec_in) { - std::vector<vespalib::string> sparse_key; + std::vector<Handle> sparse_key; size_t dense_key = 0; auto dim = res_type.dimensions().begin(); auto binding = entry.first.begin(); @@ -58,7 +60,7 @@ struct CreateParam { assert(dim->name == binding->first); assert(dim->is_mapped() == binding->second.is_mapped()); if (dim->is_mapped()) { - sparse_key.push_back(binding->second.name); + sparse_key.push_back(Handle(binding->second.name)); } else { assert(binding->second.index < dim->size); dense_key = (dense_key * dim->size) + binding->second.index; @@ -76,16 +78,16 @@ struct CreateParam { template <typename T> void my_generic_create_op(State &state, uint64_t param_in) { const auto ¶m = unwrap_param<CreateParam>(param_in); - auto builder = param.factory.create_value_builder<T>(param.res_type, - param.num_mapped_dims, - param.dense_subspace_size, - param.my_spec.size()); - std::vector<vespalib::stringref> sparse_addr; + auto builder = param.factory.create_transient_value_builder<T>(param.res_type, + param.num_mapped_dims, + param.dense_subspace_size, + param.my_spec.size()); + std::vector<label_t> sparse_addr; param.my_spec.each_entry([&](const auto &key, const auto &values) { sparse_addr.clear(); for (const auto & label : key) { - sparse_addr.push_back(label); + sparse_addr.push_back(label.id()); } T *dst = builder->add_subspace(sparse_addr).begin(); for (size_t stack_idx : values) { diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp index 026df5aa993..e0dc0feea28 100644 --- a/eval/src/vespa/eval/instruction/generic_join.cpp +++ b/eval/src/vespa/eval/instruction/generic_join.cpp @@ -41,7 +41,7 @@ generic_mixed_join(const Value &lhs, const Value &rhs, const JoinParam ¶m) if (param.sparse_plan.lhs_overlap.empty() && param.sparse_plan.rhs_overlap.empty()) { expected_subspaces = sparse.first_index.size() * sparse.second_index.size(); } - auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces); + auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces); auto outer = sparse.first_index.create_view({}); auto inner = sparse.second_index.create_view(sparse.second_view_dims); outer->lookup({}); @@ -92,7 +92,7 @@ void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) { SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index()); auto guess = lhs.index().size() * rhs.index().size(); assert(param.dense_plan.out_size == 1); - auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess); + auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess); auto outer = sparse.first_index.create_view({}); assert(sparse.second_view_dims.empty()); auto inner = sparse.second_index.create_view({}); @@ -131,7 +131,7 @@ void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) { } Fun fun(param.function); SparseJoinState sparse(param.sparse_plan, lhs_index, rhs_index); - auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size()); + auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size()); auto outer = sparse.first_index.create_view({}); auto inner = sparse.second_index.create_view(sparse.second_view_dims); outer->lookup({}); diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h index 988286be980..217f3195dec 100644 --- a/eval/src/vespa/eval/instruction/generic_join.h +++ b/eval/src/vespa/eval/instruction/generic_join.h @@ -68,10 +68,10 @@ struct SparseJoinState { const Value::Index &first_index; const Value::Index &second_index; const std::vector<size_t> &second_view_dims; - std::vector<vespalib::stringref> full_address; - std::vector<vespalib::stringref*> first_address; - std::vector<const vespalib::stringref*> address_overlap; - std::vector<vespalib::stringref*> second_only_address; + std::vector<label_t> full_address; + std::vector<label_t*> first_address; + std::vector<const label_t*> address_overlap; + std::vector<label_t*> second_only_address; size_t lhs_subspace; size_t rhs_subspace; size_t &first_subspace; diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp index 02749a04eb9..107cb805d74 100644 --- a/eval/src/vespa/eval/instruction/generic_merge.cpp +++ b/eval/src/vespa/eval/instruction/generic_merge.cpp @@ -63,10 +63,10 @@ generic_mixed_merge(const Value &a, const Value &b, const size_t num_mapped = params.num_mapped_dimensions; const size_t subspace_size = params.dense_subspace_size; size_t guess_subspaces = std::max(a.index().size(), b.index().size()); - auto builder = params.factory.create_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces); - std::vector<vespalib::stringref> address(num_mapped); - std::vector<const vespalib::stringref *> addr_cref; - std::vector<vespalib::stringref *> addr_ref; + auto builder = params.factory.create_transient_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces); + std::vector<label_t> address(num_mapped); + std::vector<const label_t *> addr_cref; + std::vector<label_t *> addr_ref; for (auto & ref : address) { addr_cref.push_back(&ref); addr_ref.push_back(&ref); diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp index 66538911890..d94742ae15c 100644 --- a/eval/src/vespa/eval/instruction/generic_peek.cpp +++ b/eval/src/vespa/eval/instruction/generic_peek.cpp @@ -7,6 +7,7 @@ #include <vespa/vespalib/util/stash.h> #include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/visit_ranges.h> +#include <vespa/vespalib/util/shared_string_repo.h> #include <cassert> using namespace vespalib::eval::tensor_function; @@ -16,6 +17,8 @@ namespace vespalib::eval::instruction { using State = InterpretedFunction::State; using Instruction = InterpretedFunction::Instruction; +using Handle = SharedStringRepo::Handle; + namespace { static constexpr size_t npos = -1; @@ -35,28 +38,43 @@ size_t count_children(const Spec &spec) } struct DimSpec { - vespalib::stringref name; - GenericPeek::SpecMap::mapped_type child_or_label; + enum class DimType { CHILD_IDX, LABEL_IDX, LABEL_STR }; + vespalib::string name; + DimType dim_type; + size_t idx; + Handle str; + static DimSpec from_child(const vespalib::string &name_in, size_t child_idx) { + return {name_in, DimType::CHILD_IDX, child_idx, Handle()}; + } + static DimSpec from_label(const vespalib::string &name_in, const TensorSpec::Label &label) { + if (label.is_mapped()) { + return {name_in, DimType::LABEL_STR, 0, Handle(label.name)}; + } else { + assert(label.is_indexed()); + return {name_in, DimType::LABEL_IDX, label.index, Handle()}; + } + } + ~DimSpec(); bool has_child() const { - return std::holds_alternative<size_t>(child_or_label); + return (dim_type == DimType::CHILD_IDX); } bool has_label() const { - return std::holds_alternative<TensorSpec::Label>(child_or_label); + return (dim_type != DimType::CHILD_IDX); } size_t get_child_idx() const { - return std::get<size_t>(child_or_label); + assert(dim_type == DimType::CHILD_IDX); + return idx; } - vespalib::stringref get_label_name() const { - auto & label = std::get<TensorSpec::Label>(child_or_label); - assert(label.is_mapped()); - return label.name; + label_t get_label_name() const { + assert(dim_type == DimType::LABEL_STR); + return str.id(); } size_t get_label_index() const { - auto & label = std::get<TensorSpec::Label>(child_or_label); - assert(label.is_indexed()); - return label.index; + assert(dim_type == DimType::LABEL_IDX); + return idx; } }; +DimSpec::~DimSpec() = default; struct ExtractedSpecs { using Dimension = ValueType::Dimension; @@ -85,7 +103,11 @@ struct ExtractedSpecs { dimensions.push_back(a); const auto & [spec_dim_name, child_or_label] = b; assert(a.name == spec_dim_name); - specs.emplace_back(DimSpec{a.name, child_or_label}); + if (std::holds_alternative<size_t>(child_or_label)) { + specs.push_back(DimSpec::from_child(a.name, std::get<size_t>(child_or_label))); + } else { + specs.push_back(DimSpec::from_label(a.name, std::get<TensorSpec::Label>(child_or_label))); + } } } }; @@ -181,22 +203,21 @@ struct DensePlan { }; struct SparseState { - std::vector<vespalib::string> view_addr; - std::vector<vespalib::stringref> view_refs; - std::vector<const vespalib::stringref *> lookup_refs; - std::vector<vespalib::stringref> output_addr; - std::vector<vespalib::stringref *> fetch_addr; - - SparseState(std::vector<vespalib::string> view_addr_in, size_t out_dims) - : view_addr(std::move(view_addr_in)), - view_refs(view_addr.size()), + std::vector<Handle> handles; + std::vector<label_t> view_addr; + std::vector<const label_t *> lookup_refs; + std::vector<label_t> output_addr; + std::vector<label_t *> fetch_addr; + + SparseState(std::vector<Handle> handles_in, std::vector<label_t> view_addr_in, size_t out_dims) + : handles(std::move(handles_in)), + view_addr(std::move(view_addr_in)), lookup_refs(view_addr.size()), output_addr(out_dims), fetch_addr(out_dims) { for (size_t i = 0; i < view_addr.size(); ++i) { - view_refs[i] = view_addr[i]; - lookup_refs[i] = &view_refs[i]; + lookup_refs[i] = &view_addr[i]; } for (size_t i = 0; i < out_dims; ++i) { fetch_addr[i] = &output_addr[i]; @@ -236,17 +257,19 @@ struct SparsePlan { template <typename Getter> SparseState make_state(const Getter &get_child_value) const { - std::vector<vespalib::string> view_addr; + std::vector<Handle> handles; + std::vector<label_t> view_addr; for (const auto & dim : lookup_specs) { if (dim.has_child()) { int64_t child_value = get_child_value(dim.get_child_idx()); - view_addr.push_back(vespalib::make_string("%" PRId64, child_value)); + handles.emplace_back(vespalib::make_string("%" PRId64, child_value)); + view_addr.push_back(handles.back().id()); } else { view_addr.push_back(dim.get_label_name()); } } assert(view_addr.size() == view_dims.size()); - return SparseState(std::move(view_addr), out_mapped_dims); + return SparseState(std::move(handles), std::move(view_addr), out_mapped_dims); } }; SparsePlan::~SparsePlan() = default; @@ -284,10 +307,10 @@ generic_mixed_peek(const ValueType &res_type, { auto input_cells = input_value.cells().typify<ICT>(); size_t bad_guess = 1; - auto builder = factory.create_value_builder<OCT>(res_type, - sparse_plan.out_mapped_dims, - dense_plan.out_dense_size, - bad_guess); + auto builder = factory.create_transient_value_builder<OCT>(res_type, + sparse_plan.out_mapped_dims, + dense_plan.out_dense_size, + bad_guess); size_t filled_subspaces = 0; size_t dense_offset = dense_plan.get_offset(get_child_value); if (dense_offset != npos) { @@ -304,7 +327,7 @@ generic_mixed_peek(const ValueType &res_type, } } if ((sparse_plan.out_mapped_dims == 0) && (filled_subspaces == 0)) { - for (auto & v : builder->add_subspace({})) { + for (auto & v : builder->add_subspace()) { v = OCT{}; } } diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp index afc46e8ee7d..d30186d3dd8 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.cpp +++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp @@ -45,10 +45,10 @@ ReduceParam::~ReduceParam() = default; //----------------------------------------------------------------------------- struct SparseReduceState { - std::vector<vespalib::stringref> full_address; - std::vector<vespalib::stringref*> fetch_address; - std::vector<vespalib::stringref*> keep_address; - size_t subspace; + std::vector<label_t> full_address; + std::vector<label_t*> fetch_address; + std::vector<label_t*> keep_address; + size_t subspace; SparseReduceState(const SparseReducePlan &plan) : full_address(plan.keep_dims.size() + plan.num_reduce_dims), @@ -71,20 +71,20 @@ template <typename ICT, typename OCT, typename AGGR> Value::UP generic_reduce(const Value &value, const ReduceParam ¶m) { auto cells = value.cells().typify<ICT>(); - ArrayArrayMap<vespalib::stringref,AGGR> map(param.sparse_plan.keep_dims.size(), - param.dense_plan.out_size, - value.index().size()); + ArrayArrayMap<label_t,AGGR> map(param.sparse_plan.keep_dims.size(), + param.dense_plan.out_size, + value.index().size()); SparseReduceState sparse(param.sparse_plan); auto full_view = value.index().create_view({}); full_view->lookup({}); - ConstArrayRef<vespalib::stringref*> keep_addr(sparse.keep_address); + ConstArrayRef<label_t*> keep_addr(sparse.keep_address); while (full_view->next_result(sparse.fetch_address, sparse.subspace)) { auto [tag, ignore] = map.lookup_or_add_entry(keep_addr); AGGR *dst = map.get_values(tag).begin(); auto sample = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx].sample(cells[src_idx]); }; param.dense_plan.execute(sparse.subspace * param.dense_plan.in_size, sample); } - auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size()); + auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size()); map.each_entry([&](const auto &keys, const auto &values) { OCT *dst = builder->add_subspace(keys).begin(); @@ -93,7 +93,7 @@ generic_reduce(const Value &value, const ReduceParam ¶m) { } }); if ((map.size() == 0) && param.sparse_plan.keep_dims.empty()) { - auto zero = builder->add_subspace({}); + auto zero = builder->add_subspace(); for (size_t i = 0; i < zero.size(); ++i) { zero[i] = OCT{}; } diff --git a/eval/src/vespa/eval/instruction/generic_rename.cpp b/eval/src/vespa/eval/instruction/generic_rename.cpp index 1ce18597ec2..894ef37b678 100644 --- a/eval/src/vespa/eval/instruction/generic_rename.cpp +++ b/eval/src/vespa/eval/instruction/generic_rename.cpp @@ -69,15 +69,15 @@ generic_rename(const Value &a, const ValueType &res_type, const ValueBuilderFactory &factory) { auto cells = a.cells().typify<CT>(); - std::vector<vespalib::stringref> output_address(sparse_plan.mapped_dims); - std::vector<vespalib::stringref*> input_address; + std::vector<label_t> output_address(sparse_plan.mapped_dims); + std::vector<label_t*> input_address; for (size_t maps_to : sparse_plan.output_dimensions) { input_address.push_back(&output_address[maps_to]); } - auto builder = factory.create_value_builder<CT>(res_type, - sparse_plan.mapped_dims, - dense_plan.subspace_size, - a.index().size()); + auto builder = factory.create_transient_value_builder<CT>(res_type, + sparse_plan.mapped_dims, + dense_plan.subspace_size, + a.index().size()); auto view = a.index().create_view({}); view->lookup({}); size_t subspace; diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp index bdfe5fd4e27..06162b2200d 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -16,8 +16,7 @@ StreamedValue<T>::get_memory_usage() const { MemoryUsage usage = self_memory_usage<StreamedValue<T>>(); usage.merge(vector_extra_memory_usage(_my_cells)); - usage.incUsedBytes(_label_buf.byteSize()); - usage.incAllocatedBytes(_label_buf.byteCapacity()); + usage.merge(vector_extra_memory_usage(_my_labels.view().handles())); return usage; } diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index 258802a53e8..94603d9d35e 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -4,6 +4,7 @@ #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/value.h> +#include <vespa/vespalib/util/shared_string_repo.h> #include "streamed_value_index.h" #include <cassert> @@ -19,20 +20,22 @@ template <typename T> class StreamedValue : public Value { private: + using StrongHandles = SharedStringRepo::StrongHandles; + ValueType _type; std::vector<T> _my_cells; - Array<char> _label_buf; + StrongHandles _my_labels; StreamedValueIndex _my_index; public: StreamedValue(ValueType type, size_t num_mapped_dimensions, - std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf) + std::vector<T> cells, size_t num_subspaces, StrongHandles && handles) : _type(std::move(type)), _my_cells(std::move(cells)), - _label_buf(std::move(label_buf)), + _my_labels(std::move(handles)), _my_index(num_mapped_dimensions, num_subspaces, - ConstArrayRef<char>(_label_buf.begin(), _label_buf.size())) + _my_labels.view().handles()) { assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size()); } @@ -42,7 +45,6 @@ public: TypedCells cells() const final override { return TypedCells(_my_cells); } const Value::Index &index() const final override { return _my_index; } MemoryUsage get_memory_usage() const final override; - auto get_data_reference() const { return _my_index.get_data_reference(); } }; } // namespace diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h index 5698c805756..48a01f893de 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_builder.h +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -3,7 +3,7 @@ #pragma once #include "streamed_value.h" -#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/shared_string_repo.h> namespace vespalib::eval { @@ -14,12 +14,14 @@ template <typename T> class StreamedValueBuilder : public ValueBuilder<T> { private: + using StrongHandles = SharedStringRepo::StrongHandles; + ValueType _type; size_t _num_mapped_dimensions; size_t _dense_subspace_size; std::vector<T> _cells; size_t _num_subspaces; - nbostream _labels; + StrongHandles _labels; public: StreamedValueBuilder(const ValueType &type, size_t num_mapped_in, @@ -30,18 +32,26 @@ public: _dense_subspace_size(subspace_size_in), _cells(), _num_subspaces(0), - _labels() + _labels(num_mapped_in * expected_subspaces) { _cells.reserve(subspace_size_in * expected_subspaces); - // assume small sized label strings: - _labels.reserve(num_mapped_in * expected_subspaces * 3); }; ~StreamedValueBuilder(); ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override { for (auto label : addr) { - _labels.writeSmallString(label); + _labels.add(label); + } + size_t old_sz = _cells.size(); + _cells.resize(old_sz + _dense_subspace_size); + _num_subspaces++; + return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size); + } + + ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + for (auto label : addr) { + _labels.add(label); } size_t old_sz = _cells.size(); _cells.resize(old_sz + _dense_subspace_size); @@ -58,7 +68,7 @@ public: _num_mapped_dimensions, std::move(_cells), _num_subspaces, - _labels.extract_buffer()); + std::move(_labels)); } }; diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp index aa6347a2c51..5111ba8a71e 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp @@ -19,10 +19,12 @@ struct SelectStreamedValueBuilder { std::unique_ptr<ValueBuilderBase> StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type, + bool transient, size_t num_mapped, size_t subspace_size, size_t expected_subspaces) const { + (void) transient; return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>( type.cell_type(), type, num_mapped, subspace_size, expected_subspaces); diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h index 3f81981f429..58072aa31dc 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h +++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h @@ -14,7 +14,7 @@ private: StreamedValueBuilderFactory() {} static StreamedValueBuilderFactory _factory; std::unique_ptr<ValueBuilderBase> create_value_builder_base( - const ValueType &type, size_t num_mapped_in, + const ValueType &type, bool transient, size_t num_mapped_in, size_t subspace_size_in, size_t expected_subspaces) const override; public: static const StreamedValueBuilderFactory &get() { return _factory; } diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp index 17cf7316554..a014f2dcee9 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -18,7 +18,7 @@ struct StreamedFilterView : Value::Index::View { LabelBlockStream label_blocks; std::vector<size_t> view_dims; - std::vector<vespalib::stringref> to_match; + std::vector<label_t> to_match; StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) : label_blocks(std::move(labels)), @@ -28,7 +28,7 @@ struct StreamedFilterView : Value::Index::View to_match.reserve(view_dims.size()); } - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { label_blocks.reset(); to_match.clear(); for (auto ptr : addr) { @@ -37,7 +37,7 @@ struct StreamedFilterView : Value::Index::View assert(view_dims.size() == to_match.size()); } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { while (const auto block = label_blocks.next_block()) { idx_out = block.subspace_index; bool matches = true; @@ -66,12 +66,12 @@ struct StreamedIterationView : Value::Index::View : label_blocks(std::move(labels)) {} - void lookup(ConstArrayRef<const vespalib::stringref*> addr) override { + void lookup(ConstArrayRef<const label_t*> addr) override { label_blocks.reset(); assert(addr.size() == 0); } - bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { if (auto block = label_blocks.next_block()) { idx_out = block.subspace_index; size_t i = 0; @@ -90,7 +90,7 @@ struct StreamedIterationView : Value::Index::View std::unique_ptr<Value::Index::View> StreamedValueIndex::create_view(const std::vector<size_t> &dims) const { - LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims); + LabelBlockStream label_stream(_num_subspaces, _labels_ref, _num_mapped_dims); if (dims.empty()) { return std::make_unique<StreamedIterationView>(std::move(label_stream)); } diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index 8fd561200c3..aa1c9a0e201 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -3,6 +3,7 @@ #pragma once #include <vespa/eval/eval/value.h> +#include <vespa/vespalib/util/shared_string_repo.h> namespace vespalib::eval { @@ -12,25 +13,21 @@ namespace vespalib::eval { **/ class StreamedValueIndex : public Value::Index { +private: + uint32_t _num_mapped_dims; + uint32_t _num_subspaces; + const std::vector<label_t> &_labels_ref; + public: - struct SerializedDataRef { - uint32_t num_mapped_dims; - uint32_t num_subspaces; - ConstArrayRef<char> labels_buffer; - }; - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf) - : _data{num_mapped_dims, num_subspaces, labels_buf} + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<label_t> &labels_ref) + : _num_mapped_dims(num_mapped_dims), + _num_subspaces(num_subspaces), + _labels_ref(labels_ref) {} // index API: - size_t size() const override { return _data.num_subspaces; } + size_t size() const override { return _num_subspaces; } std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override; - - SerializedDataRef get_data_reference() const { return _data; } - -private: - SerializedDataRef _data; }; } // namespace - diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index b88d4df8581..6b44e052f0c 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -4,24 +4,23 @@ #include <vespa/eval/eval/value.h> #include <vespa/vespalib/objects/nbostream.h> +#include <cassert> namespace vespalib::eval { /** * Reads a stream of serialized labels. - * Reading more labels than available will - * throw an exception. + * Reading more labels than available will trigger an assert. **/ struct LabelStream { - nbostream source; - LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {} - vespalib::stringref next_label() { - size_t str_size = source.getInt1_4Bytes(); - vespalib::stringref label(source.peek(), str_size); - source.adjustReadPos(str_size); - return label; + const std::vector<label_t> &source; + size_t pos; + LabelStream(const std::vector<label_t> &data) : source(data), pos(0) {} + label_t next_label() { + assert(pos < source.size()); + return source[pos++]; } - void reset() { source.rp(0); } + void reset() { pos = 0; } }; /** @@ -30,7 +29,7 @@ struct LabelStream { struct LabelBlock { static constexpr size_t npos = -1; size_t subspace_index; - ConstArrayRef<vespalib::stringref> address; + ConstArrayRef<label_t> address; operator bool() const { return subspace_index != npos; } }; @@ -43,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector<vespalib::stringref> _current_address; + std::vector<label_t> _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -62,10 +61,10 @@ public: } LabelBlockStream(uint32_t num_subspaces, - ConstArrayRef<char> label_buf, + const std::vector<label_t> &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), - _labels(label_buf), + _labels(labels), _subspace_index(num_subspaces), _current_address(num_mapped_dims) {} diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index e37f442dd9a..38eb8db786f 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,10 +24,10 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - ConstArrayRef<char> labels_buf) + const std::vector<label_t> &labels) : _type(type), _cells_ref(cells), - _my_index(num_mapped_dimensions, num_subspaces, labels_buf) + _my_index(num_mapped_dimensions, num_subspaces, labels) { assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size); } @@ -39,7 +39,6 @@ public: MemoryUsage get_memory_usage() const final override { return self_memory_usage<StreamedValueView>(); } - auto get_data_reference() const { return _my_index.get_data_reference(); } }; } // namespace diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index e1bd47af358..7b597af417d 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -563,7 +563,7 @@ void Fixture::testCompaction() { if ((_traits.use_dense_tensor_attribute && _denseTensors) || - _traits.use_direct_tensor_attribute) + ! _traits.use_dense_tensor_attribute) { LOG(info, "Skipping compaction test for tensor '%s' which is using free-lists", _cfg.tensorType().to_spec().c_str()); return; diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp index 6e1fb1a0a2f..260ffa1a388 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp @@ -3,8 +3,7 @@ #include "serialized_fast_value_attribute.h" #include "streamed_value_saver.h" #include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/fast_value.hpp> -#include <vespa/eval/streamed/streamed_value_utils.h> +#include <vespa/eval/eval/fast_value.h> #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/searchlib/attribute/readerbase.h> #include <vespa/searchlib/util/fileutil.h> @@ -21,127 +20,10 @@ using namespace vespalib::eval; namespace search::tensor { -namespace { - -struct ValueBlock : LabelBlock { - TypedCells cells; -}; - -class ValueBlockStream { -private: - const StreamedValueStore::DataFromType &_from_type; - LabelBlockStream _label_block_stream; - const char *_cells_ptr; - - size_t dsss() const { return _from_type.dense_subspace_size; } - auto cell_type() const { return _from_type.cell_type; } -public: - ValueBlock next_block() { - auto labels = _label_block_stream.next_block(); - if (labels) { - TypedCells subspace_cells(_cells_ptr, cell_type(), dsss()); - _cells_ptr += CellTypeUtils::mem_size(cell_type(), dsss()); - return ValueBlock{labels, subspace_cells}; - } else { - TypedCells none(nullptr, cell_type(), 0); - return ValueBlock{labels, none}; - } - } - - ValueBlockStream(const StreamedValueStore::DataFromType &from_type, - const StreamedValueStore::StreamedValueData &from_store) - : _from_type(from_type), - _label_block_stream(from_store.num_subspaces, - from_store.labels_buffer, - from_type.num_mapped_dimensions), - _cells_ptr((const char *)from_store.cells_ref.data) - { - _label_block_stream.reset(); - } - - ~ValueBlockStream(); -}; - -ValueBlockStream::~ValueBlockStream() = default; - -void report_problematic_subspace(size_t idx, - const StreamedValueStore::DataFromType &from_type, - const StreamedValueStore::StreamedValueData &from_store) -{ - LOG(error, "PROBLEM: add_mapping returned same index=%zu twice", idx); - FastValueIndex temp_index(from_type.num_mapped_dimensions, - from_store.num_subspaces); - auto from_start = ValueBlockStream(from_type, from_store); - while (auto redo_block = from_start.next_block()) { - if (idx == temp_index.map.add_mapping(redo_block.address)) { - vespalib::string msg = "Block with address[ "; - for (vespalib::stringref ref : redo_block.address) { - msg.append("'").append(ref).append("' "); - } - msg.append("]"); - LOG(error, "%s maps to subspace %zu", msg.c_str(), idx); - } - } -} - -/** - * This Value implementation is almost exactly like FastValue, but - * instead of owning its type and cells it just has a reference to - * data stored elsewhere. - * XXX: we should find a better name for this, and move it - * (together with the helper classes above) to its own file, - * and add associated unit tests. - **/ -class OnlyFastValueIndex : public Value { -private: - const ValueType &_type; - TypedCells _cells; - FastValueIndex my_index; -public: - OnlyFastValueIndex(const ValueType &type, - const StreamedValueStore::DataFromType &from_type, - const StreamedValueStore::StreamedValueData &from_store) - : _type(type), - _cells(from_store.cells_ref), - my_index(from_type.num_mapped_dimensions, - from_store.num_subspaces) - { - assert(_type.cell_type() == _cells.type); - std::vector<vespalib::stringref> address(from_type.num_mapped_dimensions); - auto block_stream = ValueBlockStream(from_type, from_store); - size_t ss = 0; - while (auto block = block_stream.next_block()) { - size_t idx = my_index.map.add_mapping(block.address); - if (idx != ss) { - report_problematic_subspace(idx, from_type, from_store); - } - ++ss; - } - assert(ss == from_store.num_subspaces); - } - - - ~OnlyFastValueIndex(); - - const ValueType &type() const final override { return _type; } - TypedCells cells() const final override { return _cells; } - const Index &index() const final override { return my_index; } - vespalib::MemoryUsage get_memory_usage() const final override { - auto usage = self_memory_usage<OnlyFastValueIndex>(); - usage.merge(my_index.map.estimate_extra_memory_usage()); - return usage; - } -}; - -OnlyFastValueIndex::~OnlyFastValueIndex() = default; - -} - SerializedFastValueAttribute::SerializedFastValueAttribute(stringref name, const Config &cfg) : TensorAttribute(name, cfg, _streamedValueStore), _tensor_type(cfg.tensorType()), - _streamedValueStore(_tensor_type), - _data_from_type(_tensor_type) + _streamedValueStore(_tensor_type) { } @@ -171,10 +53,8 @@ SerializedFastValueAttribute::getTensor(DocId docId) const if (!ref.valid()) { return {}; } - if (auto data_from_store = _streamedValueStore.get_tensor_data(ref)) { - return std::make_unique<OnlyFastValueIndex>(_tensor_type, - _data_from_type, - data_from_store); + if (const auto * ptr = _streamedValueStore.get_tensor_entry(ref)) { + return ptr->create_fast_value_view(_tensor_type); } return {}; } diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h index a8c1df4913a..cc559d9b758 100644 --- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h +++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h @@ -19,7 +19,6 @@ namespace search::tensor { class SerializedFastValueAttribute : public TensorAttribute { vespalib::eval::ValueType _tensor_type; StreamedValueStore _streamedValueStore; // data store for serialized tensors - const StreamedValueStore::DataFromType _data_from_type; public: SerializedFastValueAttribute(vespalib::stringref baseFileName, const Config &cfg); virtual ~SerializedFastValueAttribute(); diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index c4579880409..ef4b711b86f 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -1,99 +1,204 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "streamed_value_store.h" -#include "tensor_deserialize.h" #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/value_codec.h> +#include <vespa/eval/eval/fast_value.hpp> #include <vespa/eval/streamed/streamed_value_builder_factory.h> #include <vespa/eval/streamed/streamed_value_view.h> #include <vespa/vespalib/datastore/datastore.hpp> #include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/util/typify.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/log/log.h> LOG_SETUP(".searchlib.tensor.streamed_value_store"); using vespalib::datastore::Handle; +using vespalib::datastore::EntryRef; using namespace vespalib::eval; +using vespalib::ConstArrayRef; +using vespalib::MemoryUsage; namespace search::tensor { +//----------------------------------------------------------------------------- + namespace { -constexpr size_t MIN_BUFFER_ARRAYS = 1024; - -struct CellsMemBlock { - uint32_t num; - uint32_t total_sz; - const char *ptr; - CellsMemBlock(TypedCells cells) - : num(cells.size), - total_sz(CellTypeUtils::mem_size(cells.type, num)), - ptr((const char *)cells.data) - {} +template <typename CT, typename F> +void each_subspace(const Value &value, size_t num_mapped, size_t dense_size, F f) { + size_t subspace; + std::vector<label_t> addr(num_mapped); + std::vector<label_t*> refs; + refs.reserve(addr.size()); + for (label_t &label: addr) { + refs.push_back(&label); + } + auto cells = value.cells().typify<CT>(); + auto view = value.index().create_view({}); + view->lookup({}); + while (view->next_result(refs, subspace)) { + size_t offset = subspace * dense_size; + f(ConstArrayRef<label_t>(addr), ConstArrayRef<CT>(cells.begin() + offset, dense_size)); + } +} + +using TensorEntry = StreamedValueStore::TensorEntry; + +struct CreateTensorEntry { + template <typename CT> + static TensorEntry::SP invoke(const Value &value, size_t num_mapped, size_t dense_size) { + using EntryImpl = StreamedValueStore::TensorEntryImpl<CT>; + return std::make_shared<EntryImpl>(value, num_mapped, dense_size); + } }; -template<typename T> -void check_alignment(T *ptr, size_t align) +using HandleView = vespalib::SharedStringRepo::HandleView; + +struct MyFastValueView final : Value { + const ValueType &my_type; + FastValueIndex my_index; + TypedCells my_cells; + MyFastValueView(const ValueType &type_ref, HandleView handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) + : my_type(type_ref), + my_index(num_mapped, handle_view, num_spaces), + my_cells(cells) + { + const std::vector<label_t> &labels = handle_view.handles(); + for (size_t i = 0; i < num_spaces; ++i) { + ConstArrayRef<label_t> addr(&labels[i * num_mapped], num_mapped); + my_index.map.add_mapping(FastAddrMap::hash_labels(addr)); + } + assert(my_index.map.size() == num_spaces); + } + const ValueType &type() const override { return my_type; } + const Value::Index &index() const override { return my_index; } + TypedCells cells() const override { return my_cells; } + MemoryUsage get_memory_usage() const override { + MemoryUsage usage = self_memory_usage<MyFastValueView>(); + usage.merge(my_index.map.estimate_extra_memory_usage()); + return usage; + } +}; + +} // <unnamed> + +//----------------------------------------------------------------------------- + +StreamedValueStore::TensorEntry::~TensorEntry() = default; + +StreamedValueStore::TensorEntry::SP +StreamedValueStore::TensorEntry::create_shared_entry(const Value &value) { - static_assert(sizeof(T) == 1); - size_t ptr_val = (size_t)ptr; - size_t unalign = ptr_val & (align - 1); - assert(unalign == 0); + size_t num_mapped = value.type().count_mapped_dimensions(); + size_t dense_size = value.type().dense_subspace_size(); + return vespalib::typify_invoke<1,TypifyCellType,CreateTensorEntry>(value.type().cell_type(), value, num_mapped, dense_size); } -} // namespace <unnamed> +template <typename CT> +StreamedValueStore::TensorEntryImpl<CT>::TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size) + : handles(num_mapped * value.index().size()), + cells() +{ + cells.reserve(dense_size * value.index().size()); + auto store_subspace = [&](auto addr, auto data) { + for (label_t label: addr) { + handles.add(label); + } + for (CT entry: data) { + cells.push_back(entry); + } + }; + each_subspace<CT>(value, num_mapped, dense_size, store_subspace); +} -StreamedValueStore::StreamedValueStore(const ValueType &tensor_type) - : TensorStore(_concreteStore), - _concreteStore(), - _bufferType(RefType::align(1), - MIN_BUFFER_ARRAYS, - RefType::offsetSize() / RefType::align(1)), - _tensor_type(tensor_type), - _data_from_type(_tensor_type) +template <typename CT> +Value::UP +StreamedValueStore::TensorEntryImpl<CT>::create_fast_value_view(const ValueType &type_ref) const { - _store.addType(&_bufferType); - _store.initActiveBuffers(); - size_t align = CellTypeUtils::alignment(_data_from_type.cell_type); - // max alignment we can handle is 8: - assert(align <= 8); - // alignment must be a power of two: - assert((align & (align-1)) == 0); + size_t num_mapped = type_ref.count_mapped_dimensions(); + size_t dense_size = type_ref.dense_subspace_size(); + size_t num_spaces = cells.size() / dense_size; + assert(dense_size * num_spaces == cells.size()); + assert(num_mapped * num_spaces == handles.view().handles().size()); + return std::make_unique<MyFastValueView>(type_ref, handles.view(), TypedCells(cells), num_mapped, num_spaces); } -StreamedValueStore::~StreamedValueStore() +template <typename CT> +void +StreamedValueStore::TensorEntryImpl<CT>::encode_value(const ValueType &type, vespalib::nbostream &target) const { - _store.dropBuffers(); + size_t num_mapped = type.count_mapped_dimensions(); + size_t dense_size = type.dense_subspace_size(); + size_t num_spaces = cells.size() / dense_size; + assert(dense_size * num_spaces == cells.size()); + assert(num_mapped * num_spaces == handles.view().handles().size()); + StreamedValueView my_value(type, num_mapped, TypedCells(cells), num_spaces, handles.view().handles()); + ::vespalib::eval::encode_value(my_value, target); } -std::pair<const char *, uint32_t> -StreamedValueStore::getRawBuffer(RefType ref) const +template <typename CT> +MemoryUsage +StreamedValueStore::TensorEntryImpl<CT>::get_memory_usage() const +{ + MemoryUsage usage = self_memory_usage<TensorEntryImpl<CT>>(); + usage.merge(vector_extra_memory_usage(handles.view().handles())); + usage.merge(vector_extra_memory_usage(cells)); + return usage; +} + +template <typename CT> +StreamedValueStore::TensorEntryImpl<CT>::~TensorEntryImpl() = default; + +//----------------------------------------------------------------------------- + +constexpr size_t MIN_BUFFER_ARRAYS = 8192; + +StreamedValueStore::TensorBufferType::TensorBufferType() + : ParentType(1, MIN_BUFFER_ARRAYS, TensorStoreType::RefType::offsetSize()) { - if (!ref.valid()) { - return std::make_pair(nullptr, 0u); - } - const char *buf = _store.getEntry<char>(ref); - uint32_t len = *reinterpret_cast<const uint32_t *>(buf); - return std::make_pair(buf + sizeof(uint32_t), len); } -Handle<char> -StreamedValueStore::allocRawBuffer(uint32_t size) +void +StreamedValueStore::TensorBufferType::cleanHold(void* buffer, size_t offset, size_t num_elems, CleanContext clean_ctx) { - if (size == 0) { - return Handle<char>(); + TensorEntry::SP* elem = static_cast<TensorEntry::SP*>(buffer) + offset; + for (size_t i = 0; i < num_elems; ++i) { + clean_ctx.extraBytesCleaned((*elem)->get_memory_usage().allocatedBytes()); + *elem = _emptyEntry; + ++elem; } - size_t extSize = size + sizeof(uint32_t); - size_t bufSize = RefType::align(extSize); - auto result = _concreteStore.rawAllocator<char>(_typeId).alloc(bufSize); - *reinterpret_cast<uint32_t *>(result.data) = size; - char *padWritePtr = result.data + extSize; - for (size_t i = extSize; i < bufSize; ++i) { - *padWritePtr++ = 0; +} + +StreamedValueStore::StreamedValueStore(const ValueType &tensor_type) + : TensorStore(_concrete_store), + _concrete_store(), + _tensor_type(tensor_type) +{ + _concrete_store.enableFreeLists(); +} + +StreamedValueStore::~StreamedValueStore() = default; + +EntryRef +StreamedValueStore::add_entry(TensorEntry::SP tensor) +{ + auto ref = _concrete_store.addEntry(tensor); + auto& state = _concrete_store.getBufferState(RefType(ref).bufferId()); + state.incExtraUsedBytes(tensor->get_memory_usage().allocatedBytes()); + return ref; +} + +const StreamedValueStore::TensorEntry * +StreamedValueStore::get_tensor_entry(EntryRef ref) const +{ + if (!ref.valid()) { + return nullptr; } - // Hide length of buffer (first 4 bytes) from users of the buffer. - return Handle<char>(result.ref, result.data + sizeof(uint32_t)); + const auto& entry = _concrete_store.getEntry(ref); + assert(entry); + return entry.get(); } void @@ -102,111 +207,40 @@ StreamedValueStore::holdTensor(EntryRef ref) if (!ref.valid()) { return; } - RefType iRef(ref); - const char *buf = _store.getEntry<char>(iRef); - uint32_t len = *reinterpret_cast<const uint32_t *>(buf); - _concreteStore.holdElem(ref, len + sizeof(uint32_t)); + const auto& tensor = _concrete_store.getEntry(ref); + assert(tensor); + _concrete_store.holdElem(ref, 1, tensor->get_memory_usage().allocatedBytes()); } TensorStore::EntryRef StreamedValueStore::move(EntryRef ref) { if (!ref.valid()) { - return RefType(); + return EntryRef(); } - auto oldraw = getRawBuffer(ref); - auto newraw = allocRawBuffer(oldraw.second); - memcpy(newraw.data, oldraw.first, oldraw.second); - _concreteStore.holdElem(ref, oldraw.second + sizeof(uint32_t)); - return newraw.ref; -} - -StreamedValueStore::StreamedValueData -StreamedValueStore::get_tensor_data(EntryRef ref) const -{ - StreamedValueData retval; - retval.valid = false; - auto raw = getRawBuffer(ref); - if (raw.second == 0u) { - return retval; - } - vespalib::nbostream source(raw.first, raw.second); - uint32_t num_cells = source.readValue<uint32_t>(); - check_alignment(source.peek(), CellTypeUtils::alignment(_data_from_type.cell_type)); - retval.cells_ref = TypedCells(source.peek(), _data_from_type.cell_type, num_cells); - source.adjustReadPos(CellTypeUtils::mem_size(_data_from_type.cell_type, num_cells)); - assert((num_cells % _data_from_type.dense_subspace_size) == 0); - retval.num_subspaces = num_cells / _data_from_type.dense_subspace_size; - retval.labels_buffer = vespalib::ConstArrayRef<char>(source.peek(), source.size()); - retval.valid = true; - return retval; + const auto& old_tensor = _concrete_store.getEntry(ref); + assert(old_tensor); + auto new_ref = add_entry(old_tensor); + _concrete_store.holdElem(ref, 1, old_tensor->get_memory_usage().allocatedBytes()); + return new_ref; } bool StreamedValueStore::encode_tensor(EntryRef ref, vespalib::nbostream &target) const { - if (auto data = get_tensor_data(ref)) { - StreamedValueView value( - _tensor_type, _data_from_type.num_mapped_dimensions, - data.cells_ref, data.num_subspaces, data.labels_buffer); - vespalib::eval::encode_value(value, target); + if (const auto * entry = get_tensor_entry(ref)) { + entry->encode_value(_tensor_type, target); return true; } else { return false; } } -void -StreamedValueStore::serialize_labels(const Value::Index &index, - vespalib::nbostream &target) const -{ - uint32_t num_subspaces = index.size(); - uint32_t num_mapped_dims = _data_from_type.num_mapped_dimensions; - std::vector<vespalib::stringref> labels(num_mapped_dims * num_subspaces); - auto view = index.create_view({}); - view->lookup({}); - std::vector<vespalib::stringref> addr(num_mapped_dims); - std::vector<vespalib::stringref *> addr_refs; - for (auto & label : addr) { - addr_refs.push_back(&label); - } - size_t subspace; - for (size_t ss = 0; ss < num_subspaces; ++ss) { - bool ok = view->next_result(addr_refs, subspace); - assert(ok); - size_t idx = subspace * num_mapped_dims; - for (auto label : addr) { - labels[idx++] = label; - } - } - bool ok = view->next_result(addr_refs, subspace); - assert(!ok); - for (auto label : labels) { - target.writeSmallString(label); - } -} - TensorStore::EntryRef StreamedValueStore::store_tensor(const Value &tensor) { assert(tensor.type() == _tensor_type); - CellsMemBlock cells_mem(tensor.cells()); - vespalib::nbostream stream; - stream << uint32_t(cells_mem.num); - serialize_labels(tensor.index(), stream); - size_t mem_size = stream.size() + cells_mem.total_sz; - auto raw = allocRawBuffer(mem_size); - char *target = raw.data; - memcpy(target, stream.peek(), sizeof(uint32_t)); - stream.adjustReadPos(sizeof(uint32_t)); - target += sizeof(uint32_t); - check_alignment(target, CellTypeUtils::alignment(_data_from_type.cell_type)); - memcpy(target, cells_mem.ptr, cells_mem.total_sz); - target += cells_mem.total_sz; - memcpy(target, stream.peek(), stream.size()); - target += stream.size(); - assert(target <= raw.data + mem_size); - return raw.ref; + return add_entry(TensorEntry::create_shared_entry(tensor)); } TensorStore::EntryRef diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h index de94dc043d3..3a9d9a0b7b4 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h @@ -5,87 +5,71 @@ #include "tensor_store.h" #include <vespa/eval/eval/value_type.h> #include <vespa/eval/eval/value.h> +#include <vespa/eval/streamed/streamed_value.h> #include <vespa/vespalib/objects/nbostream.h> -#include <vespa/vespalib/util/typify.h> +#include <vespa/vespalib/util/shared_string_repo.h> namespace search::tensor { /** - * Class for storing tensors in memory, with a special serialization - * format that can be used directly to make a StreamedValueView. - * - * The tensor type is owned by the store itself and will not be - * serialized at all. - * - * The parameters for serialization (see DataFromType) are: - * - number of mapped dimensions [MD] - * - dense subspace size [DS] - * - size of each cell [CS] - currently 4 (float) or 8 (double) - * - alignment for cells - currently 4 (float) or 8 (double) - * While the tensor value to be serialized has: - * - number of dense subspaces [ND] - * - labels for dense subspaces, ND * MD strings - * - cell values, ND * DS cells (each either float or double) - * The serialization format looks like: - * - * [bytes] : [format] : [description] - * 4 : n.b.o. uint32_ t : num cells = ND * DS - * CS * ND * DS : native float or double : cells - * (depends) : n.b.o. strings : ND * MD label strings - * - * Here, n.b.o. means network byte order, or more precisely - * it's the format vespalib::nbostream uses for the given data type, - * including strings (where exact format depends on the string length). - * Note that the only unpredictably-sized data (the labels) are kept - * last. - * If we ever make a "hbostream" which uses host byte order, we - * could switch to that instead since these data are only kept in - * memory. + * Class for StreamedValue tensors in memory. */ class StreamedValueStore : public TensorStore { public: - using RefType = vespalib::datastore::AlignedEntryRefT<22, 3>; - using DataStoreType = vespalib::datastore::DataStoreT<RefType>; + using Value = vespalib::eval::Value; + using ValueType = vespalib::eval::ValueType; + using Handles = vespalib::SharedStringRepo::StrongHandles; + using MemoryUsage = vespalib::MemoryUsage; - struct StreamedValueData { - bool valid; - vespalib::eval::TypedCells cells_ref; - size_t num_subspaces; - vespalib::ConstArrayRef<char> labels_buffer; - operator bool() const { return valid; } + // interface for tensor entries + struct TensorEntry { + using SP = std::shared_ptr<TensorEntry>; + virtual Value::UP create_fast_value_view(const ValueType &type_ref) const = 0; + virtual void encode_value(const ValueType &type, vespalib::nbostream &target) const = 0; + virtual MemoryUsage get_memory_usage() const = 0; + virtual ~TensorEntry(); + static TensorEntry::SP create_shared_entry(const Value &value); }; - struct DataFromType { - uint32_t num_mapped_dimensions; - uint32_t dense_subspace_size; - vespalib::eval::CellType cell_type; - - DataFromType(const vespalib::eval::ValueType& type) - : num_mapped_dimensions(type.count_mapped_dimensions()), - dense_subspace_size(type.dense_subspace_size()), - cell_type(type.cell_type()) - {} + // implementation of tensor entries + template <typename CT> + struct TensorEntryImpl : public TensorEntry { + Handles handles; + std::vector<CT> cells; + TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size); + Value::UP create_fast_value_view(const ValueType &type_ref) const override; + void encode_value(const ValueType &type, vespalib::nbostream &target) const override; + MemoryUsage get_memory_usage() const override; + ~TensorEntryImpl() override; }; private: - DataStoreType _concreteStore; - vespalib::datastore::BufferType<char> _bufferType; - vespalib::eval::ValueType _tensor_type; - DataFromType _data_from_type; - - void serialize_labels(const vespalib::eval::Value::Index &index, - vespalib::nbostream &target) const; + // Note: Must use SP (instead of UP) because of fallbackCopy() and initializeReservedElements() in BufferType, + // and implementation of move(). + using TensorStoreType = vespalib::datastore::DataStore<TensorEntry::SP>; - std::pair<const char *, uint32_t> getRawBuffer(RefType ref) const; - vespalib::datastore::Handle<char> allocRawBuffer(uint32_t size); + class TensorBufferType : public vespalib::datastore::BufferType<TensorEntry::SP> { + private: + using ParentType = BufferType<TensorEntry::SP>; + using ParentType::_emptyEntry; + using CleanContext = typename ParentType::CleanContext; + public: + TensorBufferType(); + virtual void cleanHold(void* buffer, size_t offset, size_t num_elems, CleanContext clean_ctx) override; + }; + TensorStoreType _concrete_store; + const vespalib::eval::ValueType _tensor_type; + EntryRef add_entry(TensorEntry::SP tensor); public: StreamedValueStore(const vespalib::eval::ValueType &tensor_type); - virtual ~StreamedValueStore(); + ~StreamedValueStore() override; + + using RefType = TensorStoreType::RefType; - virtual void holdTensor(EntryRef ref) override; - virtual EntryRef move(EntryRef ref) override; + void holdTensor(EntryRef ref) override; + EntryRef move(EntryRef ref) override; - StreamedValueData get_tensor_data(EntryRef ref) const; + const TensorEntry * get_tensor_entry(EntryRef ref) const; bool encode_tensor(EntryRef ref, vespalib::nbostream &target) const; EntryRef store_tensor(const vespalib::eval::Value &tensor); diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index a5ec9540a1b..e529b1190d9 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -7,6 +7,18 @@ namespace vespalib { SharedStringRepo::Partition::~Partition() = default; void +SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const +{ + for (size_t i = 0; i < _entries.size(); ++i) { + if (!_entries[i].is_free()) { + size_t id = (((i << PART_BITS) | my_idx) + 1); + fprintf(stderr, "WARNING: shared_string_repo: leaked string id: %zu ('%s')\n", + id, _entries[i].str().c_str()); + } + } +} + +void SharedStringRepo::Partition::make_entries(size_t hint) { hint = std::max(hint, _entries.size() + 1); @@ -20,7 +32,12 @@ SharedStringRepo::Partition::make_entries(size_t hint) } SharedStringRepo::SharedStringRepo() = default; -SharedStringRepo::~SharedStringRepo() = default; +SharedStringRepo::~SharedStringRepo() +{ + for (size_t p = 0; p < _partitions.size(); ++p) { + _partitions[p].find_leaked_entries(p); + } +} SharedStringRepo & SharedStringRepo::get() @@ -44,6 +61,13 @@ SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size) _handles.reserve(expect_size); } +SharedStringRepo::StrongHandles::StrongHandles(StrongHandles &&rhs) + : _repo(rhs._repo), + _handles(std::move(rhs._handles)) +{ + assert(rhs._handles.empty()); +} + SharedStringRepo::StrongHandles::~StrongHandles() { for (uint32_t handle: _handles) { diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index afdd3a289f9..f7137984caa 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -10,6 +10,7 @@ #include <mutex> #include <vector> #include <array> +#include <cassert> namespace vespalib { @@ -34,21 +35,43 @@ private: class alignas(64) Partition { public: - struct Entry { + class Entry { + public: static constexpr uint32_t npos = -1; - uint32_t hash; - uint32_t ref_cnt; - vespalib::string str; - explicit Entry(uint32_t next) noexcept : hash(), ref_cnt(next), str() {} + private: + uint32_t _hash; + uint32_t _ref_cnt; + vespalib::string _str; + public: + explicit Entry(uint32_t next) noexcept + : _hash(next), _ref_cnt(npos), _str() {} + constexpr uint32_t hash() const noexcept { return _hash; } + constexpr const vespalib::string &str() const noexcept { return _str; } + constexpr bool is_free() const noexcept { return (_ref_cnt == npos); } uint32_t init(const AltKey &key) { - uint32_t next = ref_cnt; - hash = key.hash; - ref_cnt = 1; - str = key.str; + uint32_t next = _hash; + _hash = key.hash; + _ref_cnt = 1; + _str = key.str; return next; } void fini(uint32_t next) { - ref_cnt = next; + _hash = next; + _ref_cnt = npos; + // to reset or not to reset... + // _str.reset(); + } + vespalib::string as_string() const { + assert(!is_free()); + return _str; + } + void add_ref() { + assert(!is_free()); + ++_ref_cnt; + } + bool sub_ref() { + assert(!is_free()); + return (--_ref_cnt == 0); } }; struct Key { @@ -64,7 +87,7 @@ private: Equal(const std::vector<Entry> &entries_in) : entries(entries_in) {} Equal(const Equal &rhs) = default; bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); } - bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str == b.str)); } + bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str() == b.str)); } }; using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>; @@ -92,12 +115,13 @@ private: make_entries(64); } ~Partition(); + void find_leaked_entries(size_t my_idx) const; uint32_t resolve(const AltKey &alt_key) { std::lock_guard guard(_lock); auto pos = _hash.find(alt_key); if (pos != _hash.end()) { - ++_entries[pos->idx].ref_cnt; + _entries[pos->idx].add_ref(); return pos->idx; } else { uint32_t idx = make_entry(alt_key); @@ -108,19 +132,19 @@ private: vespalib::string as_string(uint32_t idx) { std::lock_guard guard(_lock); - return _entries[idx].str; + return _entries[idx].as_string(); } void copy(uint32_t idx) { std::lock_guard guard(_lock); - ++_entries[idx].ref_cnt; + _entries[idx].add_ref(); } void reclaim(uint32_t idx) { std::lock_guard guard(_lock); Entry &entry = _entries[idx]; - if (--entry.ref_cnt == 0) { - _hash.erase(Key{idx, entry.hash}); + if (entry.sub_ref()) { + _hash.erase(Key{idx, entry.hash()}); entry.fini(_free); _free = idx; } @@ -178,8 +202,9 @@ public: class Handle { private: uint32_t _id; + Handle(uint32_t weak_id) : _id(get().copy(weak_id)) {} public: - Handle() : _id(0) {} + Handle() noexcept : _id(0) {} Handle(vespalib::stringref str) : _id(get().resolve(str)) {} Handle(const Handle &rhs) : _id(get().copy(rhs._id)) {} Handle &operator=(const Handle &rhs) { @@ -196,9 +221,15 @@ public: rhs._id = 0; return *this; } - bool operator==(const Handle &rhs) const { return (_id == rhs._id); } - uint32_t id() const { return _id; } + // NB: not lexical sorting order, but can be used in maps + bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); } + bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); } + bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); } + uint32_t id() const noexcept { return _id; } + uint32_t hash() const noexcept { return _id; } vespalib::string as_string() const { return get().as_string(_id); } + static Handle handle_from_id(uint32_t weak_id) { return Handle(weak_id); } + static vespalib::string string_from_id(uint32_t weak_id) { return get().as_string(weak_id); } ~Handle() { get().reclaim(_id); } }; @@ -229,8 +260,20 @@ public: std::vector<uint32_t> _handles; public: StrongHandles(size_t expect_size); + StrongHandles(StrongHandles &&rhs); + StrongHandles(const StrongHandles &) = delete; + StrongHandles &operator=(const StrongHandles &) = delete; + StrongHandles &operator=(StrongHandles &&) = delete; ~StrongHandles(); - void add(vespalib::stringref str) { _handles.push_back(_repo.resolve(str)); } + uint32_t add(vespalib::stringref str) { + uint32_t id = _repo.resolve(str); + _handles.push_back(id); + return id; + } + void add(uint32_t handle) { + uint32_t id = _repo.copy(handle); + _handles.push_back(id); + } HandleView view() const { return HandleView(_handles); } }; }; |