diff options
31 files changed, 424 insertions, 278 deletions
diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp index f763c92741c..83741a0517a 100644 --- a/document/src/vespa/document/update/tensor_partial_update.cpp +++ b/document/src/vespa/document/update/tensor_partial_update.cpp @@ -44,7 +44,7 @@ struct DenseCoords { } ~DenseCoords(); void clear() { offset = 0; current = 0; } - void convert_label(label_t label_id) { + void convert_label(string_id label_id) { vespalib::string label = SharedStringRepo::Handle::string_from_id(label_id); uint32_t coord = 0; for (char c : label) { @@ -73,9 +73,9 @@ struct DenseCoords { DenseCoords::~DenseCoords() = default; struct SparseCoords { - std::vector<label_t> addr; - std::vector<label_t *> next_result_refs; - std::vector<const label_t *> lookup_refs; + std::vector<string_id> addr; + std::vector<string_id *> next_result_refs; + std::vector<const string_id *> lookup_refs; std::vector<size_t> lookup_view_dims; SparseCoords(size_t sz) : addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz) @@ -329,7 +329,7 @@ calc_mapped_dimension_indexes(const ValueType& input_type, struct ModifierCoords { - std::vector<const label_t *> lookup_refs; + std::vector<const string_id *> lookup_refs; std::vector<size_t> lookup_view_dims; ModifierCoords(const SparseCoords& input_coords, diff --git a/eval/src/tests/eval/fast_value/fast_value_test.cpp b/eval/src/tests/eval/fast_value/fast_value_test.cpp index e809fb1bcda..2124d4f169a 100644 --- a/eval/src/tests/eval/fast_value/fast_value_test.cpp +++ b/eval/src/tests/eval/fast_value/fast_value_test.cpp @@ -113,8 +113,8 @@ TEST(FastValueBuilderTest, mixed_add_subspace_robustness) { EXPECT_EQ(value->index().size(), 3); Handle foo("foo"); Handle bar("bar"); - label_t label; - label_t *label_ptr = &label; + string_id label; + string_id *label_ptr = &label; size_t subspace_idx; auto get_subspace = [&]() { auto cells = value->cells().typify<double>(); diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp index 1691d5c263c..ffc58df4a16 100644 --- a/eval/src/tests/eval/simple_value/simple_value_test.cpp +++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp @@ -16,12 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<label_t *>; -using CPA = std::vector<const label_t *>; +using PA = std::vector<string_id *>; +using CPA = std::vector<const string_id *>; using Handle = SharedStringRepo::Handle; -vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } +vespalib::string as_str(string_id label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -103,8 +103,8 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) { EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); Handle query_handle("b"); - label_t query = query_handle.id(); - label_t label; + string_id query = query_handle.id(); + string_id label; size_t subspace; std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp index 5221c4eda64..075595c5d2c 100644 --- a/eval/src/tests/streamed/value/streamed_value_test.cpp +++ b/eval/src/tests/streamed/value/streamed_value_test.cpp @@ -16,12 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<label_t *>; -using CPA = std::vector<const label_t *>; +using PA = std::vector<string_id *>; +using CPA = std::vector<const string_id *>; using Handle = SharedStringRepo::Handle; -vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } +vespalib::string as_str(string_id label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -103,8 +103,8 @@ TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) { EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); Handle query_handle("b"); - label_t query = query_handle.id(); - label_t label; + string_id query = query_handle.id(); + string_id label; size_t subspace; std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index a8a82718a28..d8f68d2a37c 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -2,12 +2,11 @@ #pragma once -#include "label.h" #include "memory_usage_stuff.h" #include <vespa/vespalib/util/arrayref.h> +#include <vespa/vespalib/util/string_id.h> #include <vespa/vespalib/stllike/identity.h> #include <vespa/vespalib/stllike/hashtable.h> -#include <vespa/vespalib/util/shared_string_repo.h> #include <vector> namespace vespalib::eval { @@ -22,8 +21,8 @@ class FastAddrMap { public: // label hasing functions - static constexpr uint32_t hash_label(label_t label) { return label; } - static constexpr uint32_t hash_label(const label_t *label) { return *label; } + static constexpr uint32_t hash_label(string_id label) { return label.hash(); } + static constexpr uint32_t hash_label(const string_id *label) { return label->hash(); } static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) { return ((full_hash * 31) + next_hash); } @@ -59,10 +58,10 @@ public: // view able to convert tags into sparse addresses struct LabelView { size_t addr_size; - const std::vector<label_t> &labels; - LabelView(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view) - : addr_size(num_mapped_dims), labels(handle_view.handles()) {} - ConstArrayRef<label_t> get_addr(size_t idx) const { + const std::vector<string_id> &labels; + LabelView(size_t num_mapped_dims, const std::vector<string_id> &labels_in) + : addr_size(num_mapped_dims), labels(labels_in) {} + ConstArrayRef<string_id> get_addr(size_t idx) const { return {&labels[idx * addr_size], addr_size}; } }; @@ -78,8 +77,8 @@ public: struct Equal { const LabelView &label_view; Equal(const LabelView &label_view_in) : label_view(label_view_in) {} - static constexpr bool eq_labels(label_t a, label_t b) { return (a == b); } - static constexpr bool eq_labels(label_t a, const label_t *b) { return (a == *b); } + static constexpr bool eq_labels(string_id a, string_id b) { return (a == b); } + static constexpr bool eq_labels(string_id a, const string_id *b) { return (a == *b); } template <typename T> bool operator()(const Entry &a, const AltKey<T> &b) const { if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) { @@ -102,8 +101,8 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view, size_t expected_subspaces) - : _labels(num_mapped_dims, handle_view), + FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels, size_t expected_subspaces) + : _labels(num_mapped_dims, labels), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); FastAddrMap(const FastAddrMap &) = delete; @@ -111,7 +110,7 @@ public: FastAddrMap(FastAddrMap &&) = delete; FastAddrMap &operator=(FastAddrMap &&) = delete; static constexpr size_t npos() { return -1; } - ConstArrayRef<label_t> get_addr(size_t idx) const { return _labels.get_addr(idx); } + ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } template <typename T> diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 972aa68b8bd..eed10b59bf9 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -5,6 +5,7 @@ #include "inline_operation.h" #include <vespa/eval/instruction/generic_join.h> #include <vespa/vespalib/stllike/hashtable.hpp> +#include <vespa/vespalib/util/shared_string_repo.h> namespace vespalib::eval { @@ -23,11 +24,11 @@ struct FastLookupView : public Value::Index::View { FastLookupView(const FastAddrMap &map_in) : map(map_in), subspace(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { subspace = map.lookup(addr); } - bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*>, size_t &idx_out) override { if (subspace == FastAddrMap::npos()) { return false; } @@ -45,10 +46,10 @@ struct FastFilterView : public Value::Index::View { const FastAddrMap ↦ std::vector<size_t> match_dims; std::vector<size_t> extract_dims; - std::vector<label_t> query; + std::vector<string_id> query; size_t pos; - bool is_match(ConstArrayRef<label_t> addr) const { + bool is_match(ConstArrayRef<string_id> addr) const { for (size_t i = 0; i < query.size(); ++i) { if (query[i] != addr[match_dims[i]]) { return false; @@ -73,7 +74,7 @@ struct FastFilterView : public Value::Index::View { assert((match_dims.size() + extract_dims.size()) == map.addr_size()); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { query[i] = *addr[i]; @@ -81,7 +82,7 @@ struct FastFilterView : public Value::Index::View { pos = 0; } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (pos < map.size()) { auto addr = map.get_addr(pos); if (is_match(addr)) { @@ -109,11 +110,11 @@ struct FastIterateView : public Value::Index::View { FastIterateView(const FastAddrMap &map_in) : map(map_in), pos(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const label_t*>) override { + void lookup(ConstArrayRef<const string_id*>) override { pos = 0; } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (pos >= map.size()) { return false; } @@ -139,8 +140,8 @@ using JoinAddrSource = instruction::SparseJoinPlan::Source; struct FastValueIndex final : Value::Index { FastAddrMap map; - FastValueIndex(size_t num_mapped_dims_in, SharedStringRepo::HandleView handle_view, size_t expected_subspaces_in) - : map(num_mapped_dims_in, handle_view, expected_subspaces_in) {} + FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in) + : map(num_mapped_dims_in, labels, expected_subspaces_in) {} template <typename LCT, typename RCT, typename OCT, typename Fun> static const Value &sparse_full_overlap_join(const ValueType &res_type, const Fun &fun, @@ -217,8 +218,11 @@ template <typename T, bool transient> struct FastValue final : Value, ValueBuilder<T> { using Handles = std::conditional<transient, - SharedStringRepo::WeakHandles, - SharedStringRepo::StrongHandles>::type; + std::vector<string_id>, + SharedStringRepo::Handles>::type; + + static const std::vector<string_id> &get_view(const std::vector<string_id> &handles) { return handles; } + static const std::vector<string_id> &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } ValueType my_type; size_t my_subspace_size; @@ -228,9 +232,12 @@ struct FastValue final : Value, ValueBuilder<T> { FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) : my_type(type_in), my_subspace_size(subspace_size_in), - my_handles(expected_subspaces_in * num_mapped_dims_in), - my_index(num_mapped_dims_in, my_handles.view(), expected_subspaces_in), - my_cells(subspace_size_in * expected_subspaces_in) {} + my_handles(), + my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), + my_cells(subspace_size_in * expected_subspaces_in) + { + my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); + } ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } @@ -247,17 +254,17 @@ struct FastValue final : Value, ValueBuilder<T> { my_index.map.add_mapping(hash); } } - void add_mapping(ConstArrayRef<label_t> addr) { + void add_mapping(ConstArrayRef<string_id> addr) { uint32_t hash = 0; - for (label_t label: addr) { + for (string_id label: addr) { hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(label)); - my_handles.add(label); + my_handles.push_back(label); } my_index.map.add_mapping(hash); } - void add_mapping(ConstArrayRef<label_t> addr, uint32_t hash) { - for (label_t label: addr) { - my_handles.add(label); + void add_mapping(ConstArrayRef<string_id> addr, uint32_t hash) { + for (string_id label: addr) { + my_handles.push_back(label); } my_index.map.add_mapping(hash); } @@ -265,7 +272,7 @@ struct FastValue final : Value, ValueBuilder<T> { add_mapping(addr); return my_cells.add_cells(my_subspace_size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override { add_mapping(addr); return my_cells.add_cells(my_subspace_size); } @@ -281,7 +288,7 @@ struct FastValue final : Value, ValueBuilder<T> { } MemoryUsage get_memory_usage() const override { MemoryUsage usage = self_memory_usage<FastValue<T,transient>>(); - usage.merge(vector_extra_memory_usage(my_handles.view().handles())); + usage.merge(vector_extra_memory_usage(get_view(my_handles))); usage.merge(my_index.map.estimate_extra_memory_usage()); usage.merge(my_cells.estimate_extra_memory_usage()); return usage; @@ -309,7 +316,7 @@ struct FastDenseValue final : Value, ValueBuilder<T> { ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) override { return ArrayRef<T>(my_cells.get(0), my_cells.size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t>) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id>) override { return ArrayRef<T>(my_cells.get(0), my_cells.size); } std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { @@ -332,7 +339,7 @@ template <typename T> struct FastScalarBuilder final : ValueBuilder<T> { T _value; ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) final override { return ArrayRef<T>(&_value, 1); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t>) final override { return ArrayRef<T>(&_value, 1); }; + ArrayRef<T> add_subspace(ConstArrayRef<string_id>) final override { return ArrayRef<T>(&_value, 1); }; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) final override { return std::make_unique<ScalarValue<T>>(_value); } }; @@ -368,7 +375,7 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun { size_t num_mapped_dims = addr_sources.size(); auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size()); - std::vector<label_t> output_addr(num_mapped_dims); + std::vector<string_id> output_addr(num_mapped_dims); std::vector<size_t> store_lhs_idx; std::vector<size_t> store_rhs_idx; size_t out_idx = 0; diff --git a/eval/src/vespa/eval/eval/label.h b/eval/src/vespa/eval/eval/label.h deleted file mode 100644 index 931f96a4f1a..00000000000 --- a/eval/src/vespa/eval/eval/label.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <cstdint> - -namespace vespalib::eval { - -// We use string ids from SharedStringRepo as labels. Note that -// label_t represents the lightweight reference type. Other structures -// (Handle/StrongHandles) are needed to keep the id valid. - -using label_t = uint32_t; - -} diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp index 0cbbb29ecf1..313c4f3008d 100644 --- a/eval/src/vespa/eval/eval/simple_value.cpp +++ b/eval/src/vespa/eval/eval/simple_value.cpp @@ -41,7 +41,7 @@ struct SimpleLookupView : public Value::Index::View { SimpleLookupView(const Map &map_in, size_t num_dims) : map(map_in), my_addr(num_dims), pos(map.end()) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == my_addr.size()); for (size_t i = 0; i < my_addr.size(); ++i) { my_addr[i] = Handle::handle_from_id(*addr[i]); @@ -49,7 +49,7 @@ struct SimpleLookupView : public Value::Index::View { pos = map.find(my_addr); } - bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*>, size_t &idx_out) override { if (pos == map.end()) { return false; } @@ -98,7 +98,7 @@ struct SimpleFilterView : public Value::Index::View { assert((match_dims.size() + extract_dims.size()) == num_dims); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { query[i] = Handle::handle_from_id(*addr[i]); @@ -106,7 +106,7 @@ struct SimpleFilterView : public Value::Index::View { pos = map.begin(); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (pos != map.end()) { if (is_match()) { assert(addr_out.size() == extract_dims.size()); @@ -138,11 +138,11 @@ struct SimpleIterateView : public Value::Index::View { SimpleIterateView(const Map &map_in) : map(map_in), pos(map.end()) {} - void lookup(ConstArrayRef<const label_t*>) override { + void lookup(ConstArrayRef<const string_id*>) override { pos = map.begin(); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (pos == map.end()) { return false; } @@ -186,10 +186,10 @@ SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr) } void -SimpleValue::add_mapping(ConstArrayRef<label_t> addr) +SimpleValue::add_mapping(ConstArrayRef<string_id> addr) { Labels my_addr; - for(label_t label: addr) { + for(string_id label: addr) { my_addr.emplace_back(Handle::handle_from_id(label)); } auto [ignore, was_inserted] = _index.emplace(my_addr, _index.size()); @@ -262,7 +262,7 @@ SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr) template <typename T> ArrayRef<T> -SimpleValueT<T>::add_subspace(ConstArrayRef<label_t> addr) +SimpleValueT<T>::add_subspace(ConstArrayRef<string_id> addr) { size_t old_size = _cells.size(); add_mapping(addr); diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h index 1fd645b704c..6f5ccd30041 100644 --- a/eval/src/vespa/eval/eval/simple_value.h +++ b/eval/src/vespa/eval/eval/simple_value.h @@ -37,7 +37,7 @@ protected: size_t num_mapped_dims() const { return _num_mapped_dims; } size_t subspace_size() const { return _subspace_size; } void add_mapping(ConstArrayRef<vespalib::stringref> addr); - void add_mapping(ConstArrayRef<label_t> addr); + void add_mapping(ConstArrayRef<string_id> addr); MemoryUsage estimate_extra_memory_usage() const; public: SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in); @@ -64,7 +64,7 @@ public: ~SimpleValueT() override; TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); } ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override; - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override; + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { if (num_mapped_dims() == 0) { assert(size() == 1); diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp index 73c7c40636c..7dcd04c8ab2 100644 --- a/eval/src/vespa/eval/eval/value.cpp +++ b/eval/src/vespa/eval/eval/value.cpp @@ -12,8 +12,8 @@ namespace { struct TrivialView : Value::Index::View { bool first = false; - void lookup(ConstArrayRef<const label_t*> ) override { first = true; } - bool next_result(ConstArrayRef<label_t*> , size_t &idx_out) override { + void lookup(ConstArrayRef<const string_id*> ) override { first = true; } + bool next_result(ConstArrayRef<string_id*> , size_t &idx_out) override { if (first) { idx_out = 0; first = false; diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h index 2efb7d7c1e4..88b394add9c 100644 --- a/eval/src/vespa/eval/eval/value.h +++ b/eval/src/vespa/eval/eval/value.h @@ -2,10 +2,10 @@ #pragma once -#include "label.h" #include "memory_usage_stuff.h" #include "value_type.h" #include "typed_cells.h" +#include <vespa/vespalib/util/string_id.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/util/traits.h> #include <vector> @@ -37,13 +37,13 @@ struct Value { // partial address for the dimensions given to // create_view. Results from the lookup is extracted using // the next_result function. - virtual void lookup(ConstArrayRef<const label_t*> addr) = 0; + virtual void lookup(ConstArrayRef<const string_id*> addr) = 0; // Extract the next result (if any) from the previous // lookup into the given partial address and index. Only // the labels for the dimensions NOT specified in // create_view will be extracted here. - virtual bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) = 0; + virtual bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) = 0; virtual ~View() {} }; @@ -167,10 +167,10 @@ struct ValueBuilder : ValueBuilderBase { // add a dense subspace for the given address where labels are // specified by shared string repo ids. Note that the caller is // responsible for making sure the ids are valid 'long enough'. - virtual ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) = 0; + virtual ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) = 0; // convenience function to add a subspace with an empty address - ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<label_t>()); } + ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<string_id>()); } // Given the ownership of the builder itself, produce the newly // created value. This means that builders can only be used once, diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp index 53131da86d8..0016dfc694f 100644 --- a/eval/src/vespa/eval/eval/value_codec.cpp +++ b/eval/src/vespa/eval/eval/value_codec.cpp @@ -129,7 +129,7 @@ size_t maybe_decode_num_blocks(nbostream &input, bool has_mapped_dims, const For return 1; } -void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<label_t> &addr) { +void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<string_id> &addr) { for (size_t i = 0; i < num_mapped_dims; ++i) { vespalib::string str = SharedStringRepo::Handle::string_from_id(addr[i]); output.writeSmallString(str); @@ -231,8 +231,8 @@ struct CreateTensorSpecFromValue { TensorSpec spec(value.type().to_spec()); size_t subspace_id = 0; size_t subspace_size = value.type().dense_subspace_size(); - std::vector<label_t> labels(value.type().count_mapped_dimensions()); - std::vector<label_t*> label_refs; + std::vector<string_id> labels(value.type().count_mapped_dimensions()); + std::vector<string_id*> label_refs; for (auto &label: labels) { label_refs.push_back(&label); } @@ -272,8 +272,8 @@ struct EncodeState { struct ContentEncoder { template<typename T> static void invoke(const Value &value, const EncodeState &state, nbostream &output) { - std::vector<label_t> address(state.num_mapped_dims); - std::vector<label_t*> a_refs(state.num_mapped_dims);; + std::vector<string_id> address(state.num_mapped_dims); + std::vector<string_id*> a_refs(state.num_mapped_dims);; for (size_t i = 0; i < state.num_mapped_dims; ++i) { a_refs[i] = &address[i]; } diff --git a/eval/src/vespa/eval/instruction/generic_create.cpp b/eval/src/vespa/eval/instruction/generic_create.cpp index 6e30da846e7..ca624ee0888 100644 --- a/eval/src/vespa/eval/instruction/generic_create.cpp +++ b/eval/src/vespa/eval/instruction/generic_create.cpp @@ -82,7 +82,7 @@ void my_generic_create_op(State &state, uint64_t param_in) { param.num_mapped_dims, param.dense_subspace_size, param.my_spec.size()); - std::vector<label_t> sparse_addr; + std::vector<string_id> sparse_addr; param.my_spec.each_entry([&](const auto &key, const auto &values) { sparse_addr.clear(); diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h index 217f3195dec..aaee64f7b25 100644 --- a/eval/src/vespa/eval/instruction/generic_join.h +++ b/eval/src/vespa/eval/instruction/generic_join.h @@ -68,10 +68,10 @@ struct SparseJoinState { const Value::Index &first_index; const Value::Index &second_index; const std::vector<size_t> &second_view_dims; - std::vector<label_t> full_address; - std::vector<label_t*> first_address; - std::vector<const label_t*> address_overlap; - std::vector<label_t*> second_only_address; + std::vector<string_id> full_address; + std::vector<string_id*> first_address; + std::vector<const string_id*> address_overlap; + std::vector<string_id*> second_only_address; size_t lhs_subspace; size_t rhs_subspace; size_t &first_subspace; diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp index 107cb805d74..b40388aa547 100644 --- a/eval/src/vespa/eval/instruction/generic_merge.cpp +++ b/eval/src/vespa/eval/instruction/generic_merge.cpp @@ -64,9 +64,9 @@ generic_mixed_merge(const Value &a, const Value &b, const size_t subspace_size = params.dense_subspace_size; size_t guess_subspaces = std::max(a.index().size(), b.index().size()); auto builder = params.factory.create_transient_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces); - std::vector<label_t> address(num_mapped); - std::vector<const label_t *> addr_cref; - std::vector<label_t *> addr_ref; + std::vector<string_id> address(num_mapped); + std::vector<const string_id *> addr_cref; + std::vector<string_id *> addr_ref; for (auto & ref : address) { addr_cref.push_back(&ref); addr_ref.push_back(&ref); diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp index d94742ae15c..4efd57d1775 100644 --- a/eval/src/vespa/eval/instruction/generic_peek.cpp +++ b/eval/src/vespa/eval/instruction/generic_peek.cpp @@ -65,7 +65,7 @@ struct DimSpec { assert(dim_type == DimType::CHILD_IDX); return idx; } - label_t get_label_name() const { + string_id get_label_name() const { assert(dim_type == DimType::LABEL_STR); return str.id(); } @@ -204,12 +204,12 @@ struct DensePlan { struct SparseState { std::vector<Handle> handles; - std::vector<label_t> view_addr; - std::vector<const label_t *> lookup_refs; - std::vector<label_t> output_addr; - std::vector<label_t *> fetch_addr; + std::vector<string_id> view_addr; + std::vector<const string_id *> lookup_refs; + std::vector<string_id> output_addr; + std::vector<string_id *> fetch_addr; - SparseState(std::vector<Handle> handles_in, std::vector<label_t> view_addr_in, size_t out_dims) + SparseState(std::vector<Handle> handles_in, std::vector<string_id> view_addr_in, size_t out_dims) : handles(std::move(handles_in)), view_addr(std::move(view_addr_in)), lookup_refs(view_addr.size()), @@ -258,7 +258,7 @@ struct SparsePlan { template <typename Getter> SparseState make_state(const Getter &get_child_value) const { std::vector<Handle> handles; - std::vector<label_t> view_addr; + std::vector<string_id> view_addr; for (const auto & dim : lookup_specs) { if (dim.has_child()) { int64_t child_value = get_child_value(dim.get_child_idx()); diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp index d30186d3dd8..b6393d0d713 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.cpp +++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp @@ -45,9 +45,9 @@ ReduceParam::~ReduceParam() = default; //----------------------------------------------------------------------------- struct SparseReduceState { - std::vector<label_t> full_address; - std::vector<label_t*> fetch_address; - std::vector<label_t*> keep_address; + std::vector<string_id> full_address; + std::vector<string_id*> fetch_address; + std::vector<string_id*> keep_address; size_t subspace; SparseReduceState(const SparseReducePlan &plan) @@ -71,13 +71,13 @@ template <typename ICT, typename OCT, typename AGGR> Value::UP generic_reduce(const Value &value, const ReduceParam ¶m) { auto cells = value.cells().typify<ICT>(); - ArrayArrayMap<label_t,AGGR> map(param.sparse_plan.keep_dims.size(), + ArrayArrayMap<string_id,AGGR> map(param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, value.index().size()); SparseReduceState sparse(param.sparse_plan); auto full_view = value.index().create_view({}); full_view->lookup({}); - ConstArrayRef<label_t*> keep_addr(sparse.keep_address); + ConstArrayRef<string_id*> keep_addr(sparse.keep_address); while (full_view->next_result(sparse.fetch_address, sparse.subspace)) { auto [tag, ignore] = map.lookup_or_add_entry(keep_addr); AGGR *dst = map.get_values(tag).begin(); diff --git a/eval/src/vespa/eval/instruction/generic_rename.cpp b/eval/src/vespa/eval/instruction/generic_rename.cpp index 894ef37b678..b6f51a79fc3 100644 --- a/eval/src/vespa/eval/instruction/generic_rename.cpp +++ b/eval/src/vespa/eval/instruction/generic_rename.cpp @@ -69,8 +69,8 @@ generic_rename(const Value &a, const ValueType &res_type, const ValueBuilderFactory &factory) { auto cells = a.cells().typify<CT>(); - std::vector<label_t> output_address(sparse_plan.mapped_dims); - std::vector<label_t*> input_address; + std::vector<string_id> output_address(sparse_plan.mapped_dims); + std::vector<string_id*> input_address; for (size_t maps_to : sparse_plan.output_dimensions) { input_address.push_back(&output_address[maps_to]); } diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp index 06162b2200d..c09e433b9b9 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -16,7 +16,7 @@ StreamedValue<T>::get_memory_usage() const { MemoryUsage usage = self_memory_usage<StreamedValue<T>>(); usage.merge(vector_extra_memory_usage(_my_cells)); - usage.merge(vector_extra_memory_usage(_my_labels.view().handles())); + usage.merge(vector_extra_memory_usage(_my_labels.view())); return usage; } diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index 94603d9d35e..ffd88a56cdd 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -20,22 +20,22 @@ template <typename T> class StreamedValue : public Value { private: - using StrongHandles = SharedStringRepo::StrongHandles; + using Handles = SharedStringRepo::Handles; ValueType _type; std::vector<T> _my_cells; - StrongHandles _my_labels; + Handles _my_labels; StreamedValueIndex _my_index; public: StreamedValue(ValueType type, size_t num_mapped_dimensions, - std::vector<T> cells, size_t num_subspaces, StrongHandles && handles) + std::vector<T> cells, size_t num_subspaces, Handles && handles) : _type(std::move(type)), _my_cells(std::move(cells)), _my_labels(std::move(handles)), _my_index(num_mapped_dimensions, num_subspaces, - _my_labels.view().handles()) + _my_labels.view()) { assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size()); } diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h index 48a01f893de..7c6e7091c15 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_builder.h +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -14,14 +14,14 @@ template <typename T> class StreamedValueBuilder : public ValueBuilder<T> { private: - using StrongHandles = SharedStringRepo::StrongHandles; + using Handles = SharedStringRepo::Handles; ValueType _type; size_t _num_mapped_dimensions; size_t _dense_subspace_size; std::vector<T> _cells; size_t _num_subspaces; - StrongHandles _labels; + Handles _labels; public: StreamedValueBuilder(const ValueType &type, size_t num_mapped_in, @@ -32,9 +32,10 @@ public: _dense_subspace_size(subspace_size_in), _cells(), _num_subspaces(0), - _labels(num_mapped_in * expected_subspaces) + _labels() { _cells.reserve(subspace_size_in * expected_subspaces); + _labels.reserve(num_mapped_in * expected_subspaces); }; ~StreamedValueBuilder(); @@ -49,9 +50,9 @@ public: return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override { for (auto label : addr) { - _labels.add(label); + _labels.push_back(label); } size_t old_sz = _cells.size(); _cells.resize(old_sz + _dense_subspace_size); diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp index a014f2dcee9..d47f0138522 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -18,7 +18,7 @@ struct StreamedFilterView : Value::Index::View { LabelBlockStream label_blocks; std::vector<size_t> view_dims; - std::vector<label_t> to_match; + std::vector<string_id> to_match; StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) : label_blocks(std::move(labels)), @@ -28,7 +28,7 @@ struct StreamedFilterView : Value::Index::View to_match.reserve(view_dims.size()); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { label_blocks.reset(); to_match.clear(); for (auto ptr : addr) { @@ -37,7 +37,7 @@ struct StreamedFilterView : Value::Index::View assert(view_dims.size() == to_match.size()); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (const auto block = label_blocks.next_block()) { idx_out = block.subspace_index; bool matches = true; @@ -66,12 +66,12 @@ struct StreamedIterationView : Value::Index::View : label_blocks(std::move(labels)) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { label_blocks.reset(); assert(addr.size() == 0); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (auto block = label_blocks.next_block()) { idx_out = block.subspace_index; size_t i = 0; diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index aa1c9a0e201..e94462c89f4 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index private: uint32_t _num_mapped_dims; uint32_t _num_subspaces; - const std::vector<label_t> &_labels_ref; + const std::vector<string_id> &_labels_ref; public: - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<label_t> &labels_ref) + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<string_id> &labels_ref) : _num_mapped_dims(num_mapped_dims), _num_subspaces(num_subspaces), _labels_ref(labels_ref) diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index 6b44e052f0c..9eb2804367d 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -13,10 +13,10 @@ namespace vespalib::eval { * Reading more labels than available will trigger an assert. **/ struct LabelStream { - const std::vector<label_t> &source; + const std::vector<string_id> &source; size_t pos; - LabelStream(const std::vector<label_t> &data) : source(data), pos(0) {} - label_t next_label() { + LabelStream(const std::vector<string_id> &data) : source(data), pos(0) {} + string_id next_label() { assert(pos < source.size()); return source[pos++]; } @@ -29,7 +29,7 @@ struct LabelStream { struct LabelBlock { static constexpr size_t npos = -1; size_t subspace_index; - ConstArrayRef<label_t> address; + ConstArrayRef<string_id> address; operator bool() const { return subspace_index != npos; } }; @@ -42,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector<label_t> _current_address; + std::vector<string_id> _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -61,7 +61,7 @@ public: } LabelBlockStream(uint32_t num_subspaces, - const std::vector<label_t> &labels, + const std::vector<string_id> &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), _labels(labels), diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index 38eb8db786f..af2f707e0b1 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,7 +24,7 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - const std::vector<label_t> &labels) + const std::vector<string_id> &labels) : _type(type), _cells_ref(cells), _my_index(num_mapped_dimensions, num_subspaces, labels) diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp index ef4b711b86f..ce0f7992144 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp @@ -19,6 +19,7 @@ using vespalib::datastore::EntryRef; using namespace vespalib::eval; using vespalib::ConstArrayRef; using vespalib::MemoryUsage; +using vespalib::string_id; namespace search::tensor { @@ -29,10 +30,10 @@ namespace { template <typename CT, typename F> void each_subspace(const Value &value, size_t num_mapped, size_t dense_size, F f) { size_t subspace; - std::vector<label_t> addr(num_mapped); - std::vector<label_t*> refs; + std::vector<string_id> addr(num_mapped); + std::vector<string_id*> refs; refs.reserve(addr.size()); - for (label_t &label: addr) { + for (string_id &label: addr) { refs.push_back(&label); } auto cells = value.cells().typify<CT>(); @@ -40,7 +41,7 @@ void each_subspace(const Value &value, size_t num_mapped, size_t dense_size, F f view->lookup({}); while (view->next_result(refs, subspace)) { size_t offset = subspace * dense_size; - f(ConstArrayRef<label_t>(addr), ConstArrayRef<CT>(cells.begin() + offset, dense_size)); + f(ConstArrayRef<string_id>(addr), ConstArrayRef<CT>(cells.begin() + offset, dense_size)); } } @@ -54,20 +55,18 @@ struct CreateTensorEntry { } }; -using HandleView = vespalib::SharedStringRepo::HandleView; - struct MyFastValueView final : Value { const ValueType &my_type; FastValueIndex my_index; TypedCells my_cells; - MyFastValueView(const ValueType &type_ref, HandleView handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) + MyFastValueView(const ValueType &type_ref, const std::vector<string_id> &handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces) : my_type(type_ref), my_index(num_mapped, handle_view, num_spaces), my_cells(cells) { - const std::vector<label_t> &labels = handle_view.handles(); + const std::vector<string_id> &labels = handle_view; for (size_t i = 0; i < num_spaces; ++i) { - ConstArrayRef<label_t> addr(&labels[i * num_mapped], num_mapped); + ConstArrayRef<string_id> addr(&labels[i * num_mapped], num_mapped); my_index.map.add_mapping(FastAddrMap::hash_labels(addr)); } assert(my_index.map.size() == num_spaces); @@ -98,13 +97,14 @@ StreamedValueStore::TensorEntry::create_shared_entry(const Value &value) template <typename CT> StreamedValueStore::TensorEntryImpl<CT>::TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size) - : handles(num_mapped * value.index().size()), + : handles(), cells() { + handles.reserve(num_mapped * value.index().size()); cells.reserve(dense_size * value.index().size()); auto store_subspace = [&](auto addr, auto data) { - for (label_t label: addr) { - handles.add(label); + for (string_id label: addr) { + handles.push_back(label); } for (CT entry: data) { cells.push_back(entry); @@ -121,7 +121,7 @@ StreamedValueStore::TensorEntryImpl<CT>::create_fast_value_view(const ValueType size_t dense_size = type_ref.dense_subspace_size(); size_t num_spaces = cells.size() / dense_size; assert(dense_size * num_spaces == cells.size()); - assert(num_mapped * num_spaces == handles.view().handles().size()); + assert(num_mapped * num_spaces == handles.view().size()); return std::make_unique<MyFastValueView>(type_ref, handles.view(), TypedCells(cells), num_mapped, num_spaces); } @@ -133,8 +133,8 @@ StreamedValueStore::TensorEntryImpl<CT>::encode_value(const ValueType &type, ves size_t dense_size = type.dense_subspace_size(); size_t num_spaces = cells.size() / dense_size; assert(dense_size * num_spaces == cells.size()); - assert(num_mapped * num_spaces == handles.view().handles().size()); - StreamedValueView my_value(type, num_mapped, TypedCells(cells), num_spaces, handles.view().handles()); + assert(num_mapped * num_spaces == handles.view().size()); + StreamedValueView my_value(type, num_mapped, TypedCells(cells), num_spaces, handles.view()); ::vespalib::eval::encode_value(my_value, target); } @@ -143,7 +143,7 @@ MemoryUsage StreamedValueStore::TensorEntryImpl<CT>::get_memory_usage() const { MemoryUsage usage = self_memory_usage<TensorEntryImpl<CT>>(); - usage.merge(vector_extra_memory_usage(handles.view().handles())); + usage.merge(vector_extra_memory_usage(handles.view())); usage.merge(vector_extra_memory_usage(cells)); return usage; } diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h index 3a9d9a0b7b4..1df860f4007 100644 --- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h +++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h @@ -18,7 +18,7 @@ class StreamedValueStore : public TensorStore { public: using Value = vespalib::eval::Value; using ValueType = vespalib::eval::ValueType; - using Handles = vespalib::SharedStringRepo::StrongHandles; + using Handles = vespalib::SharedStringRepo::Handles; using MemoryUsage = vespalib::MemoryUsage; // interface for tensor entries diff --git a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp index e8f39c88afe..d560513e24d 100644 --- a/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp +++ b/vespalib/src/tests/shared_string_repo/shared_string_repo_test.cpp @@ -13,6 +13,7 @@ using namespace vespalib; using make_string_short::fmt; using Handle = SharedStringRepo::Handle; +using Handles = SharedStringRepo::Handles; bool verbose = false; double budget = 0.10; @@ -30,12 +31,7 @@ std::vector<vespalib::string> make_strings(size_t cnt) { } std::vector<vespalib::string> copy_strings(const std::vector<vespalib::string> &strings) { - std::vector<vespalib::string> result; - result.reserve(strings.size()); - for (const auto &str: strings) { - result.push_back(str); - } - return result; + return strings; } std::vector<std::pair<vespalib::string, uint64_t>> copy_and_hash(const std::vector<vespalib::string> &strings) { @@ -75,20 +71,27 @@ std::vector<vespalib::string> get_strings(const std::vector<Handle> &handles) { return strings; } -std::unique_ptr<SharedStringRepo::StrongHandles> make_strong_handles(const std::vector<vespalib::string> &strings) { - auto result = std::make_unique<SharedStringRepo::StrongHandles>(strings.size()); +std::unique_ptr<SharedStringRepo::Handles> make_strong_handles(const std::vector<vespalib::string> &strings) { + auto result = std::make_unique<SharedStringRepo::Handles>(); + result->reserve(strings.size()); for (const auto &str: strings) { result->add(str); } return result; } -std::unique_ptr<SharedStringRepo::WeakHandles> make_weak_handles(const SharedStringRepo::HandleView &view) { - auto result = std::make_unique<SharedStringRepo::WeakHandles>(view.handles().size()); - for (uint32_t handle: view.handles()) { - result->add(handle); +std::unique_ptr<SharedStringRepo::Handles> copy_strong_handles(const SharedStringRepo::Handles &handles) { + const auto &view = handles.view(); + auto result = std::make_unique<SharedStringRepo::Handles>(); + result->reserve(view.size()); + for (const auto &handle: view) { + result->push_back(handle); } - return result; + return result; +} + +std::unique_ptr<std::vector<string_id>> make_weak_handles(const SharedStringRepo::Handles &handles) { + return std::make_unique<std::vector<string_id>>(handles.view()); } //----------------------------------------------------------------------------- @@ -183,8 +186,9 @@ struct Fixture { std::vector<Handle> copy_handles_result; std::vector<Handle> resolve_again_result; std::vector<vespalib::string> get_result; - std::unique_ptr<SharedStringRepo::StrongHandles> strong; - std::unique_ptr<SharedStringRepo::WeakHandles> weak; + std::unique_ptr<SharedStringRepo::Handles> strong; + std::unique_ptr<SharedStringRepo::Handles> strong_copy; + std::unique_ptr<std::vector<string_id>> weak; auto copy_strings_task = [&](){ copy_strings_result = copy_strings(work); }; auto copy_and_hash_task = [&](){ copy_and_hash_result = copy_and_hash(work); }; auto local_enum_task = [&](){ local_enum_result = local_enum(work); }; @@ -195,8 +199,10 @@ struct Fixture { auto reclaim_task = [&]() { resolve_again_result.clear(); }; auto reclaim_last_task = [&]() { resolve_result.clear(); }; auto make_strong_task = [&]() { strong = make_strong_handles(work); }; - auto make_weak_task = [&]() { weak = make_weak_handles(strong->view()); }; + auto copy_strong_task = [&]() { strong_copy = copy_strong_handles(*strong); }; + auto make_weak_task = [&]() { weak = make_weak_handles(*strong); }; auto free_weak_task = [&]() { weak.reset(); }; + auto free_strong_copy_task = [&]() { strong_copy.reset(); }; auto free_strong_task = [&]() { strong.reset(); }; measure_task("[01] copy strings", is_master, copy_strings_task); measure_task("[02] copy and hash", is_master, copy_and_hash_task); @@ -211,36 +217,117 @@ struct Fixture { copy_handles_result.clear(); measure_task("[09] reclaim last", is_master, reclaim_last_task); measure_task("[10] make strong handles", is_master, make_strong_task); - measure_task("[11] make weak handles", is_master, make_weak_task); - measure_task("[12] free weak handles", is_master, free_weak_task); - measure_task("[13] free strong handles", is_master, free_strong_task); + measure_task("[11] copy strong handles", is_master, copy_strong_task); + measure_task("[12] make weak handles", is_master, make_weak_task); + measure_task("[13] free weak handles", is_master, free_weak_task); + measure_task("[14] free strong handles copy", is_master, free_strong_copy_task); + measure_task("[15] free strong handles", is_master, free_strong_task); } } }; //----------------------------------------------------------------------------- -TEST("require that basic usage works") { +void verify_eq(const Handle &a, const Handle &b) { + EXPECT_TRUE(a == b); + EXPECT_TRUE(a.id() == b.id()); + EXPECT_FALSE(a != b); + EXPECT_FALSE(a.id() != b.id()); + EXPECT_FALSE(a < b); + EXPECT_FALSE(a.id() < b.id()); + EXPECT_FALSE(b < a); + EXPECT_FALSE(b.id() < a.id()); +} + +void verify_not_eq(const Handle &a, const Handle &b) { + EXPECT_FALSE(a == b); + EXPECT_FALSE(a.id() == b.id()); + EXPECT_TRUE(a != b); + EXPECT_TRUE(a.id() != b.id()); + EXPECT_NOT_EQUAL((a < b), (b < a)); + EXPECT_NOT_EQUAL((a.id() < b.id()), (b.id() < a.id())); +} + +//----------------------------------------------------------------------------- + +TEST("require that basic handle usage works") { Handle empty; Handle foo("foo"); Handle bar("bar"); Handle empty2; Handle foo2("foo"); - Handle bar2(bar); - EXPECT_EQUAL(empty.id(), 0u); - EXPECT_TRUE(empty.id() != foo.id()); - EXPECT_TRUE(empty.id() != bar.id()); - EXPECT_TRUE(foo.id() != bar.id()); - EXPECT_EQUAL(empty.id(), empty2.id()); - EXPECT_EQUAL(foo.id(), foo2.id()); - EXPECT_EQUAL(bar.id(), bar2.id()); + Handle bar2("bar"); + + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 2u); + + TEST_DO(verify_eq(empty, empty2)); + TEST_DO(verify_eq(foo, foo2)); + TEST_DO(verify_eq(bar, bar2)); + + TEST_DO(verify_not_eq(empty, foo)); + TEST_DO(verify_not_eq(empty, bar)); + TEST_DO(verify_not_eq(foo, bar)); + + EXPECT_TRUE(empty.id() == string_id()); + EXPECT_TRUE(empty2.id() == string_id()); EXPECT_EQUAL(empty.as_string(), vespalib::string("")); + EXPECT_EQUAL(empty2.as_string(), vespalib::string("")); EXPECT_EQUAL(foo.as_string(), vespalib::string("foo")); EXPECT_EQUAL(bar.as_string(), vespalib::string("bar")); EXPECT_EQUAL(foo2.as_string(), vespalib::string("foo")); EXPECT_EQUAL(bar2.as_string(), vespalib::string("bar")); } +TEST("require that handles can be copied") { + Handle a("foo"); + Handle b(a); + Handle c; + c = b; + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_TRUE(a.id() == b.id()); + EXPECT_TRUE(b.id() == c.id()); + EXPECT_EQUAL(c.as_string(), vespalib::string("foo")); +} + +TEST("require that handles can be moved") { + Handle a("foo"); + Handle b(std::move(a)); + Handle c; + c = std::move(b); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_TRUE(a.id() == string_id()); + EXPECT_TRUE(b.id() == string_id()); + EXPECT_EQUAL(c.as_string(), vespalib::string("foo")); +} + +TEST("require that handle/string can be obtained from string_id") { + Handle a("str"); + Handle b = Handle::handle_from_id(a.id()); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 1u); + EXPECT_EQUAL(Handle::string_from_id(b.id()), vespalib::string("str")); +} + +//----------------------------------------------------------------------------- + +TEST("require that basic multi-handle usage works") { + Handles a; + a.reserve(4); + Handle foo("foo"); + Handle bar("bar"); + EXPECT_TRUE(a.add("foo") == foo.id()); + EXPECT_TRUE(a.add("bar") == bar.id()); + a.push_back(foo.id()); + a.push_back(bar.id()); + Handles b(std::move(a)); + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 2u); + EXPECT_EQUAL(a.view().size(), 0u); + EXPECT_EQUAL(b.view().size(), 4u); + EXPECT_TRUE(b.view()[0] == foo.id()); + EXPECT_TRUE(b.view()[1] == bar.id()); + EXPECT_TRUE(b.view()[2] == foo.id()); + EXPECT_TRUE(b.view()[3] == bar.id()); +} + //----------------------------------------------------------------------------- TEST_MT_F("test shared string repo operations with 1 threads", 1, Fixture(num_threads)) { @@ -273,6 +360,22 @@ TEST_MT_F("test shared string repo operations with 64 threads", 64, Fixture(num_ //----------------------------------------------------------------------------- +#if 0 +// verify leak-detection and reporting + +TEST("leak some handles on purpose") { + new Handle("leaked string"); + new Handle("also leaked"); + new Handle("even more leak"); +} +#endif + +TEST("require that no handles have leaked during testing") { + EXPECT_EQUAL(SharedStringRepo::stats().active_entries, 0u); +} + +//----------------------------------------------------------------------------- + int main(int argc, char **argv) { TEST_MASTER.init(__FILE__); if ((argc == 2) && (argv[1] == std::string("verbose"))) { diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp index e529b1190d9..187f586d344 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp @@ -2,8 +2,24 @@ #include "shared_string_repo.h" +#include <vespa/log/log.h> +LOG_SETUP(".vespalib.shared_string_repo"); + namespace vespalib { +SharedStringRepo::Stats::Stats() + : active_entries(0), + total_entries(0) +{ +} + +void +SharedStringRepo::Stats::merge(const Stats &s) +{ + active_entries += s.active_entries; + total_entries += s.total_entries; +} + SharedStringRepo::Partition::~Partition() = default; void @@ -12,12 +28,22 @@ SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const for (size_t i = 0; i < _entries.size(); ++i) { if (!_entries[i].is_free()) { size_t id = (((i << PART_BITS) | my_idx) + 1); - fprintf(stderr, "WARNING: shared_string_repo: leaked string id: %zu ('%s')\n", - id, _entries[i].str().c_str()); + LOG(warning, "leaked string id: %zu (part: %zu/%d, string: '%s')\n", + id, my_idx, NUM_PARTS, _entries[i].str().c_str()); } } } +SharedStringRepo::Stats +SharedStringRepo::Partition::stats() const +{ + Stats stats; + std::lock_guard guard(_lock); + stats.active_entries = _hash.size(); + stats.total_entries = _entries.size(); + return stats; +} + void SharedStringRepo::Partition::make_entries(size_t hint) { @@ -31,6 +57,8 @@ SharedStringRepo::Partition::make_entries(size_t hint) } } +SharedStringRepo SharedStringRepo::_repo; + SharedStringRepo::SharedStringRepo() = default; SharedStringRepo::~SharedStringRepo() { @@ -39,38 +67,30 @@ SharedStringRepo::~SharedStringRepo() } } -SharedStringRepo & -SharedStringRepo::get() +SharedStringRepo::Stats +SharedStringRepo::stats() { - static SharedStringRepo repo; - return repo; + Stats stats; + for (const auto &part: _repo._partitions) { + stats.merge(part.stats()); + } + return stats; } -SharedStringRepo::WeakHandles::WeakHandles(size_t expect_size) +SharedStringRepo::Handles::Handles() : _handles() { - _handles.reserve(expect_size); -} - -SharedStringRepo::WeakHandles::~WeakHandles() = default; - -SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size) - : _repo(SharedStringRepo::get()), - _handles() -{ - _handles.reserve(expect_size); } -SharedStringRepo::StrongHandles::StrongHandles(StrongHandles &&rhs) - : _repo(rhs._repo), - _handles(std::move(rhs._handles)) +SharedStringRepo::Handles::Handles(Handles &&rhs) + : _handles(std::move(rhs._handles)) { assert(rhs._handles.empty()); } -SharedStringRepo::StrongHandles::~StrongHandles() +SharedStringRepo::Handles::~Handles() { - for (uint32_t handle: _handles) { + for (string_id handle: _handles) { _repo.reclaim(handle); } } diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h index f7137984caa..b2a222c82af 100644 --- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h +++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h @@ -2,6 +2,7 @@ #pragma once +#include "string_id.h" #include "spin_lock.h" #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/stllike/identity.h> @@ -23,6 +24,14 @@ namespace vespalib { * repo. Handle objects are used to track which strings are in use. **/ class SharedStringRepo { +public: + struct Stats { + size_t active_entries; + size_t total_entries; + Stats(); + void merge(const Stats &s); + }; + private: static constexpr int NUM_PARTS = 64; static constexpr int PART_BITS = 6; @@ -58,8 +67,7 @@ private: void fini(uint32_t next) { _hash = next; _ref_cnt = npos; - // to reset or not to reset... - // _str.reset(); + _str.reset(); } vespalib::string as_string() const { assert(!is_free()); @@ -92,7 +100,7 @@ private: using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>; private: - SpinLock _lock; + mutable SpinLock _lock; std::vector<Entry> _entries; uint32_t _free; HashType _hash; @@ -116,6 +124,7 @@ private: } ~Partition(); void find_leaked_entries(size_t my_idx) const; + Stats stats() const; uint32_t resolve(const AltKey &alt_key) { std::lock_guard guard(_lock); @@ -130,7 +139,7 @@ private: } } - vespalib::string as_string(uint32_t idx) { + vespalib::string as_string(uint32_t idx) const { std::lock_guard guard(_lock); return _entries[idx].as_string(); } @@ -156,125 +165,107 @@ private: SharedStringRepo(); ~SharedStringRepo(); - uint32_t resolve(vespalib::stringref str) { + string_id resolve(vespalib::stringref str) { if (!str.empty()) { uint64_t full_hash = XXH3_64bits(str.data(), str.size()); uint32_t part = full_hash & PART_MASK; uint32_t local_hash = full_hash >> PART_BITS; uint32_t local_idx = _partitions[part].resolve(AltKey{str, local_hash}); - return (((local_idx << PART_BITS) | part) + 1); + return string_id(((local_idx << PART_BITS) | part) + 1); } else { - return 0; + return {}; } } - vespalib::string as_string(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + vespalib::string as_string(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; return _partitions[part].as_string(local_idx); } else { return {}; } } - uint32_t copy(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + string_id copy(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; _partitions[part].copy(local_idx); } return id; } - void reclaim(uint32_t id) { - if (id != 0) { - uint32_t part = (id - 1) & PART_MASK; - uint32_t local_idx = (id - 1) >> PART_BITS; + void reclaim(string_id id) { + if (id._id != 0) { + uint32_t part = (id._id - 1) & PART_MASK; + uint32_t local_idx = (id._id - 1) >> PART_BITS; _partitions[part].reclaim(local_idx); } } + static SharedStringRepo _repo; + public: - static SharedStringRepo &get(); + static Stats stats(); // A single stand-alone string handle with ownership class Handle { private: - uint32_t _id; - Handle(uint32_t weak_id) : _id(get().copy(weak_id)) {} + string_id _id; + Handle(string_id weak_id) : _id(_repo.copy(weak_id)) {} public: - Handle() noexcept : _id(0) {} - Handle(vespalib::stringref str) : _id(get().resolve(str)) {} - Handle(const Handle &rhs) : _id(get().copy(rhs._id)) {} + Handle() noexcept : _id() {} + Handle(vespalib::stringref str) : _id(_repo.resolve(str)) {} + Handle(const Handle &rhs) : _id(_repo.copy(rhs._id)) {} Handle &operator=(const Handle &rhs) { - get().reclaim(_id); - _id = get().copy(rhs._id); - return *this; + _repo.reclaim(_id); + _id = _repo.copy(rhs._id); + return *this; } Handle(Handle &&rhs) noexcept : _id(rhs._id) { - rhs._id = 0; + rhs._id = string_id(); } Handle &operator=(Handle &&rhs) { - get().reclaim(_id); + _repo.reclaim(_id); _id = rhs._id; - rhs._id = 0; + rhs._id = string_id(); return *this; } // NB: not lexical sorting order, but can be used in maps bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); } bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); } bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); } - uint32_t id() const noexcept { return _id; } - uint32_t hash() const noexcept { return _id; } - vespalib::string as_string() const { return get().as_string(_id); } - static Handle handle_from_id(uint32_t weak_id) { return Handle(weak_id); } - static vespalib::string string_from_id(uint32_t weak_id) { return get().as_string(weak_id); } - ~Handle() { get().reclaim(_id); } - }; - - // Read-only access to a collection of string handles - class HandleView { - private: - const std::vector<uint32_t> &_handles; - public: - HandleView(const std::vector<uint32_t> &handles_in) : _handles(handles_in) {} - const std::vector<uint32_t> &handles() const { return _handles; } - }; - - // A collection of string handles without ownership - class WeakHandles { - private: - std::vector<uint32_t> _handles; - public: - WeakHandles(size_t expect_size); - ~WeakHandles(); - void add(uint32_t handle) { _handles.push_back(handle); } - HandleView view() const { return HandleView(_handles); } + string_id id() const noexcept { return _id; } + uint32_t hash() const noexcept { return _id.hash(); } + vespalib::string as_string() const { return _repo.as_string(_id); } + static Handle handle_from_id(string_id weak_id) { return Handle(weak_id); } + static vespalib::string string_from_id(string_id weak_id) { return _repo.as_string(weak_id); } + ~Handle() { _repo.reclaim(_id); } }; // A collection of string handles with ownership - class StrongHandles { + class Handles { private: - SharedStringRepo &_repo; - std::vector<uint32_t> _handles; + std::vector<string_id> _handles; public: - StrongHandles(size_t expect_size); - StrongHandles(StrongHandles &&rhs); - StrongHandles(const StrongHandles &) = delete; - StrongHandles &operator=(const StrongHandles &) = delete; - StrongHandles &operator=(StrongHandles &&) = delete; - ~StrongHandles(); - uint32_t add(vespalib::stringref str) { - uint32_t id = _repo.resolve(str); + Handles(); + Handles(Handles &&rhs); + Handles(const Handles &) = delete; + Handles &operator=(const Handles &) = delete; + Handles &operator=(Handles &&) = delete; + ~Handles(); + string_id add(vespalib::stringref str) { + string_id id = _repo.resolve(str); _handles.push_back(id); return id; } - void add(uint32_t handle) { - uint32_t id = _repo.copy(handle); + void reserve(size_t value) { _handles.reserve(value); } + void push_back(string_id handle) { + string_id id = _repo.copy(handle); _handles.push_back(id); } - HandleView view() const { return HandleView(_handles); } + const std::vector<string_id> &view() const { return _handles; } }; }; diff --git a/vespalib/src/vespa/vespalib/util/string_id.h b/vespalib/src/vespa/vespalib/util/string_id.h new file mode 100644 index 00000000000..92711acdc67 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_id.h @@ -0,0 +1,40 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace vespalib { + +class SharedStringRepo; + +/** + * A typed integer value representing the identity of a string stored + * in the global SharedStringRepo. This class is a simple wrapper with + * no lifetime management of the mapping between string value and + * string id. For a string_id to be valid, it needs to be owned by at + * least one SharedStringRepo::Handle or SharedStringRepo::Handles + * object. This is similar to how std::enable_shared_from_this works; + * the string_id acts like a reference to a mapping from a string to a + * numberical value (without ownership) and Handle/Handles act like + * shared pointers to the same mapping (with shared ownership). + **/ +class string_id { + friend class ::vespalib::SharedStringRepo; +private: + uint32_t _id; + explicit constexpr string_id(uint32_t value_in) noexcept : _id(value_in) {} +public: + constexpr string_id() noexcept : _id(0) {} + constexpr string_id(const string_id &) noexcept = default; + constexpr string_id(string_id &&) noexcept = default; + constexpr string_id &operator=(const string_id &) noexcept = default; + constexpr string_id &operator=(string_id &&) noexcept = default; + constexpr uint32_t hash() const noexcept { return _id; } + // NB: not lexical sorting order, but can be used in maps + constexpr bool operator<(const string_id &rhs) const noexcept { return (_id < rhs._id); } + constexpr bool operator==(const string_id &rhs) const noexcept { return (_id == rhs._id); } + constexpr bool operator!=(const string_id &rhs) const noexcept { return (_id != rhs._id); } +}; + +} |