diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-01-06 14:49:48 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-01-11 11:08:19 +0000 |
commit | 32292572be63bf1725d34e4c071cd6690ecd43e8 (patch) | |
tree | 2306a12514572cfa6323a92ad120d5d8d5c69b60 /eval | |
parent | 381e20b7609eb6557168425089a6a5c948c50554 (diff) |
add string_id and simplify multi-handle container
add more testing
static instance instead of lazy singleton
use logging for leak detection
drop label_t type alias
Diffstat (limited to 'eval')
24 files changed, 127 insertions, 135 deletions
diff --git a/eval/src/tests/eval/fast_value/fast_value_test.cpp b/eval/src/tests/eval/fast_value/fast_value_test.cpp index e809fb1bcda..2124d4f169a 100644 --- a/eval/src/tests/eval/fast_value/fast_value_test.cpp +++ b/eval/src/tests/eval/fast_value/fast_value_test.cpp @@ -113,8 +113,8 @@ TEST(FastValueBuilderTest, mixed_add_subspace_robustness) { EXPECT_EQ(value->index().size(), 3); Handle foo("foo"); Handle bar("bar"); - label_t label; - label_t *label_ptr = &label; + string_id label; + string_id *label_ptr = &label; size_t subspace_idx; auto get_subspace = [&]() { auto cells = value->cells().typify<double>(); diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp index 1691d5c263c..ffc58df4a16 100644 --- a/eval/src/tests/eval/simple_value/simple_value_test.cpp +++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp @@ -16,12 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<label_t *>; -using CPA = std::vector<const label_t *>; +using PA = std::vector<string_id *>; +using CPA = std::vector<const string_id *>; using Handle = SharedStringRepo::Handle; -vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } +vespalib::string as_str(string_id label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -103,8 +103,8 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) { EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); Handle query_handle("b"); - label_t query = query_handle.id(); - label_t label; + string_id query = query_handle.id(); + string_id label; size_t subspace; std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp index 5221c4eda64..075595c5d2c 100644 --- a/eval/src/tests/streamed/value/streamed_value_test.cpp +++ b/eval/src/tests/streamed/value/streamed_value_test.cpp @@ -16,12 +16,12 @@ using namespace vespalib::eval::test; using vespalib::make_string_short::fmt; -using PA = std::vector<label_t *>; -using CPA = std::vector<const label_t *>; +using PA = std::vector<string_id *>; +using CPA = std::vector<const string_id *>; using Handle = SharedStringRepo::Handle; -vespalib::string as_str(label_t label) { return Handle::string_from_id(label); } +vespalib::string as_str(string_id label) { return Handle::string_from_id(label); } std::vector<Layout> layouts = { {}, @@ -103,8 +103,8 @@ TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) { EXPECT_EQ(value->index().size(), 6); auto view = value->index().create_view({0}); Handle query_handle("b"); - label_t query = query_handle.id(); - label_t label; + string_id query = query_handle.id(); + string_id label; size_t subspace; std::map<vespalib::string,size_t> result; view->lookup(CPA{&query}); diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h index a8a82718a28..d8f68d2a37c 100644 --- a/eval/src/vespa/eval/eval/fast_addr_map.h +++ b/eval/src/vespa/eval/eval/fast_addr_map.h @@ -2,12 +2,11 @@ #pragma once -#include "label.h" #include "memory_usage_stuff.h" #include <vespa/vespalib/util/arrayref.h> +#include <vespa/vespalib/util/string_id.h> #include <vespa/vespalib/stllike/identity.h> #include <vespa/vespalib/stllike/hashtable.h> -#include <vespa/vespalib/util/shared_string_repo.h> #include <vector> namespace vespalib::eval { @@ -22,8 +21,8 @@ class FastAddrMap { public: // label hasing functions - static constexpr uint32_t hash_label(label_t label) { return label; } - static constexpr uint32_t hash_label(const label_t *label) { return *label; } + static constexpr uint32_t hash_label(string_id label) { return label.hash(); } + static constexpr uint32_t hash_label(const string_id *label) { return label->hash(); } static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) { return ((full_hash * 31) + next_hash); } @@ -59,10 +58,10 @@ public: // view able to convert tags into sparse addresses struct LabelView { size_t addr_size; - const std::vector<label_t> &labels; - LabelView(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view) - : addr_size(num_mapped_dims), labels(handle_view.handles()) {} - ConstArrayRef<label_t> get_addr(size_t idx) const { + const std::vector<string_id> &labels; + LabelView(size_t num_mapped_dims, const std::vector<string_id> &labels_in) + : addr_size(num_mapped_dims), labels(labels_in) {} + ConstArrayRef<string_id> get_addr(size_t idx) const { return {&labels[idx * addr_size], addr_size}; } }; @@ -78,8 +77,8 @@ public: struct Equal { const LabelView &label_view; Equal(const LabelView &label_view_in) : label_view(label_view_in) {} - static constexpr bool eq_labels(label_t a, label_t b) { return (a == b); } - static constexpr bool eq_labels(label_t a, const label_t *b) { return (a == *b); } + static constexpr bool eq_labels(string_id a, string_id b) { return (a == b); } + static constexpr bool eq_labels(string_id a, const string_id *b) { return (a == *b); } template <typename T> bool operator()(const Entry &a, const AltKey<T> &b) const { if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) { @@ -102,8 +101,8 @@ private: HashType _map; public: - FastAddrMap(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view, size_t expected_subspaces) - : _labels(num_mapped_dims, handle_view), + FastAddrMap(size_t num_mapped_dims, const std::vector<string_id> &labels, size_t expected_subspaces) + : _labels(num_mapped_dims, labels), _map(expected_subspaces * 2, Hash(), Equal(_labels)) {} ~FastAddrMap(); FastAddrMap(const FastAddrMap &) = delete; @@ -111,7 +110,7 @@ public: FastAddrMap(FastAddrMap &&) = delete; FastAddrMap &operator=(FastAddrMap &&) = delete; static constexpr size_t npos() { return -1; } - ConstArrayRef<label_t> get_addr(size_t idx) const { return _labels.get_addr(idx); } + ConstArrayRef<string_id> get_addr(size_t idx) const { return _labels.get_addr(idx); } size_t size() const { return _map.size(); } constexpr size_t addr_size() const { return _labels.addr_size; } template <typename T> diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp index 972aa68b8bd..eed10b59bf9 100644 --- a/eval/src/vespa/eval/eval/fast_value.hpp +++ b/eval/src/vespa/eval/eval/fast_value.hpp @@ -5,6 +5,7 @@ #include "inline_operation.h" #include <vespa/eval/instruction/generic_join.h> #include <vespa/vespalib/stllike/hashtable.hpp> +#include <vespa/vespalib/util/shared_string_repo.h> namespace vespalib::eval { @@ -23,11 +24,11 @@ struct FastLookupView : public Value::Index::View { FastLookupView(const FastAddrMap &map_in) : map(map_in), subspace(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { subspace = map.lookup(addr); } - bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*>, size_t &idx_out) override { if (subspace == FastAddrMap::npos()) { return false; } @@ -45,10 +46,10 @@ struct FastFilterView : public Value::Index::View { const FastAddrMap ↦ std::vector<size_t> match_dims; std::vector<size_t> extract_dims; - std::vector<label_t> query; + std::vector<string_id> query; size_t pos; - bool is_match(ConstArrayRef<label_t> addr) const { + bool is_match(ConstArrayRef<string_id> addr) const { for (size_t i = 0; i < query.size(); ++i) { if (query[i] != addr[match_dims[i]]) { return false; @@ -73,7 +74,7 @@ struct FastFilterView : public Value::Index::View { assert((match_dims.size() + extract_dims.size()) == map.addr_size()); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { query[i] = *addr[i]; @@ -81,7 +82,7 @@ struct FastFilterView : public Value::Index::View { pos = 0; } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (pos < map.size()) { auto addr = map.get_addr(pos); if (is_match(addr)) { @@ -109,11 +110,11 @@ struct FastIterateView : public Value::Index::View { FastIterateView(const FastAddrMap &map_in) : map(map_in), pos(FastAddrMap::npos()) {} - void lookup(ConstArrayRef<const label_t*>) override { + void lookup(ConstArrayRef<const string_id*>) override { pos = 0; } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (pos >= map.size()) { return false; } @@ -139,8 +140,8 @@ using JoinAddrSource = instruction::SparseJoinPlan::Source; struct FastValueIndex final : Value::Index { FastAddrMap map; - FastValueIndex(size_t num_mapped_dims_in, SharedStringRepo::HandleView handle_view, size_t expected_subspaces_in) - : map(num_mapped_dims_in, handle_view, expected_subspaces_in) {} + FastValueIndex(size_t num_mapped_dims_in, const std::vector<string_id> &labels, size_t expected_subspaces_in) + : map(num_mapped_dims_in, labels, expected_subspaces_in) {} template <typename LCT, typename RCT, typename OCT, typename Fun> static const Value &sparse_full_overlap_join(const ValueType &res_type, const Fun &fun, @@ -217,8 +218,11 @@ template <typename T, bool transient> struct FastValue final : Value, ValueBuilder<T> { using Handles = std::conditional<transient, - SharedStringRepo::WeakHandles, - SharedStringRepo::StrongHandles>::type; + std::vector<string_id>, + SharedStringRepo::Handles>::type; + + static const std::vector<string_id> &get_view(const std::vector<string_id> &handles) { return handles; } + static const std::vector<string_id> &get_view(const SharedStringRepo::Handles &handles) { return handles.view(); } ValueType my_type; size_t my_subspace_size; @@ -228,9 +232,12 @@ struct FastValue final : Value, ValueBuilder<T> { FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in) : my_type(type_in), my_subspace_size(subspace_size_in), - my_handles(expected_subspaces_in * num_mapped_dims_in), - my_index(num_mapped_dims_in, my_handles.view(), expected_subspaces_in), - my_cells(subspace_size_in * expected_subspaces_in) {} + my_handles(), + my_index(num_mapped_dims_in, get_view(my_handles), expected_subspaces_in), + my_cells(subspace_size_in * expected_subspaces_in) + { + my_handles.reserve(expected_subspaces_in * num_mapped_dims_in); + } ~FastValue() override; const ValueType &type() const override { return my_type; } const Value::Index &index() const override { return my_index; } @@ -247,17 +254,17 @@ struct FastValue final : Value, ValueBuilder<T> { my_index.map.add_mapping(hash); } } - void add_mapping(ConstArrayRef<label_t> addr) { + void add_mapping(ConstArrayRef<string_id> addr) { uint32_t hash = 0; - for (label_t label: addr) { + for (string_id label: addr) { hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(label)); - my_handles.add(label); + my_handles.push_back(label); } my_index.map.add_mapping(hash); } - void add_mapping(ConstArrayRef<label_t> addr, uint32_t hash) { - for (label_t label: addr) { - my_handles.add(label); + void add_mapping(ConstArrayRef<string_id> addr, uint32_t hash) { + for (string_id label: addr) { + my_handles.push_back(label); } my_index.map.add_mapping(hash); } @@ -265,7 +272,7 @@ struct FastValue final : Value, ValueBuilder<T> { add_mapping(addr); return my_cells.add_cells(my_subspace_size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override { add_mapping(addr); return my_cells.add_cells(my_subspace_size); } @@ -281,7 +288,7 @@ struct FastValue final : Value, ValueBuilder<T> { } MemoryUsage get_memory_usage() const override { MemoryUsage usage = self_memory_usage<FastValue<T,transient>>(); - usage.merge(vector_extra_memory_usage(my_handles.view().handles())); + usage.merge(vector_extra_memory_usage(get_view(my_handles))); usage.merge(my_index.map.estimate_extra_memory_usage()); usage.merge(my_cells.estimate_extra_memory_usage()); return usage; @@ -309,7 +316,7 @@ struct FastDenseValue final : Value, ValueBuilder<T> { ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) override { return ArrayRef<T>(my_cells.get(0), my_cells.size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t>) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id>) override { return ArrayRef<T>(my_cells.get(0), my_cells.size); } std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { @@ -332,7 +339,7 @@ template <typename T> struct FastScalarBuilder final : ValueBuilder<T> { T _value; ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) final override { return ArrayRef<T>(&_value, 1); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t>) final override { return ArrayRef<T>(&_value, 1); }; + ArrayRef<T> add_subspace(ConstArrayRef<string_id>) final override { return ArrayRef<T>(&_value, 1); }; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) final override { return std::make_unique<ScalarValue<T>>(_value); } }; @@ -368,7 +375,7 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun { size_t num_mapped_dims = addr_sources.size(); auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size()); - std::vector<label_t> output_addr(num_mapped_dims); + std::vector<string_id> output_addr(num_mapped_dims); std::vector<size_t> store_lhs_idx; std::vector<size_t> store_rhs_idx; size_t out_idx = 0; diff --git a/eval/src/vespa/eval/eval/label.h b/eval/src/vespa/eval/eval/label.h deleted file mode 100644 index 931f96a4f1a..00000000000 --- a/eval/src/vespa/eval/eval/label.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <cstdint> - -namespace vespalib::eval { - -// We use string ids from SharedStringRepo as labels. Note that -// label_t represents the lightweight reference type. Other structures -// (Handle/StrongHandles) are needed to keep the id valid. - -using label_t = uint32_t; - -} diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp index 0cbbb29ecf1..313c4f3008d 100644 --- a/eval/src/vespa/eval/eval/simple_value.cpp +++ b/eval/src/vespa/eval/eval/simple_value.cpp @@ -41,7 +41,7 @@ struct SimpleLookupView : public Value::Index::View { SimpleLookupView(const Map &map_in, size_t num_dims) : map(map_in), my_addr(num_dims), pos(map.end()) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == my_addr.size()); for (size_t i = 0; i < my_addr.size(); ++i) { my_addr[i] = Handle::handle_from_id(*addr[i]); @@ -49,7 +49,7 @@ struct SimpleLookupView : public Value::Index::View { pos = map.find(my_addr); } - bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*>, size_t &idx_out) override { if (pos == map.end()) { return false; } @@ -98,7 +98,7 @@ struct SimpleFilterView : public Value::Index::View { assert((match_dims.size() + extract_dims.size()) == num_dims); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { assert(addr.size() == query.size()); for (size_t i = 0; i < addr.size(); ++i) { query[i] = Handle::handle_from_id(*addr[i]); @@ -106,7 +106,7 @@ struct SimpleFilterView : public Value::Index::View { pos = map.begin(); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (pos != map.end()) { if (is_match()) { assert(addr_out.size() == extract_dims.size()); @@ -138,11 +138,11 @@ struct SimpleIterateView : public Value::Index::View { SimpleIterateView(const Map &map_in) : map(map_in), pos(map.end()) {} - void lookup(ConstArrayRef<const label_t*>) override { + void lookup(ConstArrayRef<const string_id*>) override { pos = map.begin(); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (pos == map.end()) { return false; } @@ -186,10 +186,10 @@ SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr) } void -SimpleValue::add_mapping(ConstArrayRef<label_t> addr) +SimpleValue::add_mapping(ConstArrayRef<string_id> addr) { Labels my_addr; - for(label_t label: addr) { + for(string_id label: addr) { my_addr.emplace_back(Handle::handle_from_id(label)); } auto [ignore, was_inserted] = _index.emplace(my_addr, _index.size()); @@ -262,7 +262,7 @@ SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr) template <typename T> ArrayRef<T> -SimpleValueT<T>::add_subspace(ConstArrayRef<label_t> addr) +SimpleValueT<T>::add_subspace(ConstArrayRef<string_id> addr) { size_t old_size = _cells.size(); add_mapping(addr); diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h index 1fd645b704c..6f5ccd30041 100644 --- a/eval/src/vespa/eval/eval/simple_value.h +++ b/eval/src/vespa/eval/eval/simple_value.h @@ -37,7 +37,7 @@ protected: size_t num_mapped_dims() const { return _num_mapped_dims; } size_t subspace_size() const { return _subspace_size; } void add_mapping(ConstArrayRef<vespalib::stringref> addr); - void add_mapping(ConstArrayRef<label_t> addr); + void add_mapping(ConstArrayRef<string_id> addr); MemoryUsage estimate_extra_memory_usage() const; public: SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in); @@ -64,7 +64,7 @@ public: ~SimpleValueT() override; TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); } ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override; - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override; + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override; std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override { if (num_mapped_dims() == 0) { assert(size() == 1); diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp index 73c7c40636c..7dcd04c8ab2 100644 --- a/eval/src/vespa/eval/eval/value.cpp +++ b/eval/src/vespa/eval/eval/value.cpp @@ -12,8 +12,8 @@ namespace { struct TrivialView : Value::Index::View { bool first = false; - void lookup(ConstArrayRef<const label_t*> ) override { first = true; } - bool next_result(ConstArrayRef<label_t*> , size_t &idx_out) override { + void lookup(ConstArrayRef<const string_id*> ) override { first = true; } + bool next_result(ConstArrayRef<string_id*> , size_t &idx_out) override { if (first) { idx_out = 0; first = false; diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h index 2efb7d7c1e4..88b394add9c 100644 --- a/eval/src/vespa/eval/eval/value.h +++ b/eval/src/vespa/eval/eval/value.h @@ -2,10 +2,10 @@ #pragma once -#include "label.h" #include "memory_usage_stuff.h" #include "value_type.h" #include "typed_cells.h" +#include <vespa/vespalib/util/string_id.h> #include <vespa/vespalib/stllike/string.h> #include <vespa/vespalib/util/traits.h> #include <vector> @@ -37,13 +37,13 @@ struct Value { // partial address for the dimensions given to // create_view. Results from the lookup is extracted using // the next_result function. - virtual void lookup(ConstArrayRef<const label_t*> addr) = 0; + virtual void lookup(ConstArrayRef<const string_id*> addr) = 0; // Extract the next result (if any) from the previous // lookup into the given partial address and index. Only // the labels for the dimensions NOT specified in // create_view will be extracted here. - virtual bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) = 0; + virtual bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) = 0; virtual ~View() {} }; @@ -167,10 +167,10 @@ struct ValueBuilder : ValueBuilderBase { // add a dense subspace for the given address where labels are // specified by shared string repo ids. Note that the caller is // responsible for making sure the ids are valid 'long enough'. - virtual ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) = 0; + virtual ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) = 0; // convenience function to add a subspace with an empty address - ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<label_t>()); } + ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<string_id>()); } // Given the ownership of the builder itself, produce the newly // created value. This means that builders can only be used once, diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp index 53131da86d8..0016dfc694f 100644 --- a/eval/src/vespa/eval/eval/value_codec.cpp +++ b/eval/src/vespa/eval/eval/value_codec.cpp @@ -129,7 +129,7 @@ size_t maybe_decode_num_blocks(nbostream &input, bool has_mapped_dims, const For return 1; } -void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<label_t> &addr) { +void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<string_id> &addr) { for (size_t i = 0; i < num_mapped_dims; ++i) { vespalib::string str = SharedStringRepo::Handle::string_from_id(addr[i]); output.writeSmallString(str); @@ -231,8 +231,8 @@ struct CreateTensorSpecFromValue { TensorSpec spec(value.type().to_spec()); size_t subspace_id = 0; size_t subspace_size = value.type().dense_subspace_size(); - std::vector<label_t> labels(value.type().count_mapped_dimensions()); - std::vector<label_t*> label_refs; + std::vector<string_id> labels(value.type().count_mapped_dimensions()); + std::vector<string_id*> label_refs; for (auto &label: labels) { label_refs.push_back(&label); } @@ -272,8 +272,8 @@ struct EncodeState { struct ContentEncoder { template<typename T> static void invoke(const Value &value, const EncodeState &state, nbostream &output) { - std::vector<label_t> address(state.num_mapped_dims); - std::vector<label_t*> a_refs(state.num_mapped_dims);; + std::vector<string_id> address(state.num_mapped_dims); + std::vector<string_id*> a_refs(state.num_mapped_dims);; for (size_t i = 0; i < state.num_mapped_dims; ++i) { a_refs[i] = &address[i]; } diff --git a/eval/src/vespa/eval/instruction/generic_create.cpp b/eval/src/vespa/eval/instruction/generic_create.cpp index 6e30da846e7..ca624ee0888 100644 --- a/eval/src/vespa/eval/instruction/generic_create.cpp +++ b/eval/src/vespa/eval/instruction/generic_create.cpp @@ -82,7 +82,7 @@ void my_generic_create_op(State &state, uint64_t param_in) { param.num_mapped_dims, param.dense_subspace_size, param.my_spec.size()); - std::vector<label_t> sparse_addr; + std::vector<string_id> sparse_addr; param.my_spec.each_entry([&](const auto &key, const auto &values) { sparse_addr.clear(); diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h index 217f3195dec..aaee64f7b25 100644 --- a/eval/src/vespa/eval/instruction/generic_join.h +++ b/eval/src/vespa/eval/instruction/generic_join.h @@ -68,10 +68,10 @@ struct SparseJoinState { const Value::Index &first_index; const Value::Index &second_index; const std::vector<size_t> &second_view_dims; - std::vector<label_t> full_address; - std::vector<label_t*> first_address; - std::vector<const label_t*> address_overlap; - std::vector<label_t*> second_only_address; + std::vector<string_id> full_address; + std::vector<string_id*> first_address; + std::vector<const string_id*> address_overlap; + std::vector<string_id*> second_only_address; size_t lhs_subspace; size_t rhs_subspace; size_t &first_subspace; diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp index 107cb805d74..b40388aa547 100644 --- a/eval/src/vespa/eval/instruction/generic_merge.cpp +++ b/eval/src/vespa/eval/instruction/generic_merge.cpp @@ -64,9 +64,9 @@ generic_mixed_merge(const Value &a, const Value &b, const size_t subspace_size = params.dense_subspace_size; size_t guess_subspaces = std::max(a.index().size(), b.index().size()); auto builder = params.factory.create_transient_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces); - std::vector<label_t> address(num_mapped); - std::vector<const label_t *> addr_cref; - std::vector<label_t *> addr_ref; + std::vector<string_id> address(num_mapped); + std::vector<const string_id *> addr_cref; + std::vector<string_id *> addr_ref; for (auto & ref : address) { addr_cref.push_back(&ref); addr_ref.push_back(&ref); diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp index d94742ae15c..4efd57d1775 100644 --- a/eval/src/vespa/eval/instruction/generic_peek.cpp +++ b/eval/src/vespa/eval/instruction/generic_peek.cpp @@ -65,7 +65,7 @@ struct DimSpec { assert(dim_type == DimType::CHILD_IDX); return idx; } - label_t get_label_name() const { + string_id get_label_name() const { assert(dim_type == DimType::LABEL_STR); return str.id(); } @@ -204,12 +204,12 @@ struct DensePlan { struct SparseState { std::vector<Handle> handles; - std::vector<label_t> view_addr; - std::vector<const label_t *> lookup_refs; - std::vector<label_t> output_addr; - std::vector<label_t *> fetch_addr; + std::vector<string_id> view_addr; + std::vector<const string_id *> lookup_refs; + std::vector<string_id> output_addr; + std::vector<string_id *> fetch_addr; - SparseState(std::vector<Handle> handles_in, std::vector<label_t> view_addr_in, size_t out_dims) + SparseState(std::vector<Handle> handles_in, std::vector<string_id> view_addr_in, size_t out_dims) : handles(std::move(handles_in)), view_addr(std::move(view_addr_in)), lookup_refs(view_addr.size()), @@ -258,7 +258,7 @@ struct SparsePlan { template <typename Getter> SparseState make_state(const Getter &get_child_value) const { std::vector<Handle> handles; - std::vector<label_t> view_addr; + std::vector<string_id> view_addr; for (const auto & dim : lookup_specs) { if (dim.has_child()) { int64_t child_value = get_child_value(dim.get_child_idx()); diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp index d30186d3dd8..b6393d0d713 100644 --- a/eval/src/vespa/eval/instruction/generic_reduce.cpp +++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp @@ -45,9 +45,9 @@ ReduceParam::~ReduceParam() = default; //----------------------------------------------------------------------------- struct SparseReduceState { - std::vector<label_t> full_address; - std::vector<label_t*> fetch_address; - std::vector<label_t*> keep_address; + std::vector<string_id> full_address; + std::vector<string_id*> fetch_address; + std::vector<string_id*> keep_address; size_t subspace; SparseReduceState(const SparseReducePlan &plan) @@ -71,13 +71,13 @@ template <typename ICT, typename OCT, typename AGGR> Value::UP generic_reduce(const Value &value, const ReduceParam ¶m) { auto cells = value.cells().typify<ICT>(); - ArrayArrayMap<label_t,AGGR> map(param.sparse_plan.keep_dims.size(), + ArrayArrayMap<string_id,AGGR> map(param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, value.index().size()); SparseReduceState sparse(param.sparse_plan); auto full_view = value.index().create_view({}); full_view->lookup({}); - ConstArrayRef<label_t*> keep_addr(sparse.keep_address); + ConstArrayRef<string_id*> keep_addr(sparse.keep_address); while (full_view->next_result(sparse.fetch_address, sparse.subspace)) { auto [tag, ignore] = map.lookup_or_add_entry(keep_addr); AGGR *dst = map.get_values(tag).begin(); diff --git a/eval/src/vespa/eval/instruction/generic_rename.cpp b/eval/src/vespa/eval/instruction/generic_rename.cpp index 894ef37b678..b6f51a79fc3 100644 --- a/eval/src/vespa/eval/instruction/generic_rename.cpp +++ b/eval/src/vespa/eval/instruction/generic_rename.cpp @@ -69,8 +69,8 @@ generic_rename(const Value &a, const ValueType &res_type, const ValueBuilderFactory &factory) { auto cells = a.cells().typify<CT>(); - std::vector<label_t> output_address(sparse_plan.mapped_dims); - std::vector<label_t*> input_address; + std::vector<string_id> output_address(sparse_plan.mapped_dims); + std::vector<string_id*> input_address; for (size_t maps_to : sparse_plan.output_dimensions) { input_address.push_back(&output_address[maps_to]); } diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp index 06162b2200d..c09e433b9b9 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value.cpp @@ -16,7 +16,7 @@ StreamedValue<T>::get_memory_usage() const { MemoryUsage usage = self_memory_usage<StreamedValue<T>>(); usage.merge(vector_extra_memory_usage(_my_cells)); - usage.merge(vector_extra_memory_usage(_my_labels.view().handles())); + usage.merge(vector_extra_memory_usage(_my_labels.view())); return usage; } diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h index 94603d9d35e..ffd88a56cdd 100644 --- a/eval/src/vespa/eval/streamed/streamed_value.h +++ b/eval/src/vespa/eval/streamed/streamed_value.h @@ -20,22 +20,22 @@ template <typename T> class StreamedValue : public Value { private: - using StrongHandles = SharedStringRepo::StrongHandles; + using Handles = SharedStringRepo::Handles; ValueType _type; std::vector<T> _my_cells; - StrongHandles _my_labels; + Handles _my_labels; StreamedValueIndex _my_index; public: StreamedValue(ValueType type, size_t num_mapped_dimensions, - std::vector<T> cells, size_t num_subspaces, StrongHandles && handles) + std::vector<T> cells, size_t num_subspaces, Handles && handles) : _type(std::move(type)), _my_cells(std::move(cells)), _my_labels(std::move(handles)), _my_index(num_mapped_dimensions, num_subspaces, - _my_labels.view().handles()) + _my_labels.view()) { assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size()); } diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h index 48a01f893de..7c6e7091c15 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_builder.h +++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h @@ -14,14 +14,14 @@ template <typename T> class StreamedValueBuilder : public ValueBuilder<T> { private: - using StrongHandles = SharedStringRepo::StrongHandles; + using Handles = SharedStringRepo::Handles; ValueType _type; size_t _num_mapped_dimensions; size_t _dense_subspace_size; std::vector<T> _cells; size_t _num_subspaces; - StrongHandles _labels; + Handles _labels; public: StreamedValueBuilder(const ValueType &type, size_t num_mapped_in, @@ -32,9 +32,10 @@ public: _dense_subspace_size(subspace_size_in), _cells(), _num_subspaces(0), - _labels(num_mapped_in * expected_subspaces) + _labels() { _cells.reserve(subspace_size_in * expected_subspaces); + _labels.reserve(num_mapped_in * expected_subspaces); }; ~StreamedValueBuilder(); @@ -49,9 +50,9 @@ public: return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size); } - ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override { + ArrayRef<T> add_subspace(ConstArrayRef<string_id> addr) override { for (auto label : addr) { - _labels.add(label); + _labels.push_back(label); } size_t old_sz = _cells.size(); _cells.resize(old_sz + _dense_subspace_size); diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp index a014f2dcee9..d47f0138522 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.cpp +++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp @@ -18,7 +18,7 @@ struct StreamedFilterView : Value::Index::View { LabelBlockStream label_blocks; std::vector<size_t> view_dims; - std::vector<label_t> to_match; + std::vector<string_id> to_match; StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in) : label_blocks(std::move(labels)), @@ -28,7 +28,7 @@ struct StreamedFilterView : Value::Index::View to_match.reserve(view_dims.size()); } - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { label_blocks.reset(); to_match.clear(); for (auto ptr : addr) { @@ -37,7 +37,7 @@ struct StreamedFilterView : Value::Index::View assert(view_dims.size() == to_match.size()); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { while (const auto block = label_blocks.next_block()) { idx_out = block.subspace_index; bool matches = true; @@ -66,12 +66,12 @@ struct StreamedIterationView : Value::Index::View : label_blocks(std::move(labels)) {} - void lookup(ConstArrayRef<const label_t*> addr) override { + void lookup(ConstArrayRef<const string_id*> addr) override { label_blocks.reset(); assert(addr.size() == 0); } - bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override { + bool next_result(ConstArrayRef<string_id*> addr_out, size_t &idx_out) override { if (auto block = label_blocks.next_block()) { idx_out = block.subspace_index; size_t i = 0; diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h index aa1c9a0e201..e94462c89f4 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_index.h +++ b/eval/src/vespa/eval/streamed/streamed_value_index.h @@ -16,10 +16,10 @@ class StreamedValueIndex : public Value::Index private: uint32_t _num_mapped_dims; uint32_t _num_subspaces; - const std::vector<label_t> &_labels_ref; + const std::vector<string_id> &_labels_ref; public: - StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<label_t> &labels_ref) + StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<string_id> &labels_ref) : _num_mapped_dims(num_mapped_dims), _num_subspaces(num_subspaces), _labels_ref(labels_ref) diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h index 6b44e052f0c..9eb2804367d 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_utils.h +++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h @@ -13,10 +13,10 @@ namespace vespalib::eval { * Reading more labels than available will trigger an assert. **/ struct LabelStream { - const std::vector<label_t> &source; + const std::vector<string_id> &source; size_t pos; - LabelStream(const std::vector<label_t> &data) : source(data), pos(0) {} - label_t next_label() { + LabelStream(const std::vector<string_id> &data) : source(data), pos(0) {} + string_id next_label() { assert(pos < source.size()); return source[pos++]; } @@ -29,7 +29,7 @@ struct LabelStream { struct LabelBlock { static constexpr size_t npos = -1; size_t subspace_index; - ConstArrayRef<label_t> address; + ConstArrayRef<string_id> address; operator bool() const { return subspace_index != npos; } }; @@ -42,7 +42,7 @@ private: size_t _num_subspaces; LabelStream _labels; size_t _subspace_index; - std::vector<label_t> _current_address; + std::vector<string_id> _current_address; public: LabelBlock next_block() { if (_subspace_index < _num_subspaces) { @@ -61,7 +61,7 @@ public: } LabelBlockStream(uint32_t num_subspaces, - const std::vector<label_t> &labels, + const std::vector<string_id> &labels, uint32_t num_mapped_dims) : _num_subspaces(num_subspaces), _labels(labels), diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h index 38eb8db786f..af2f707e0b1 100644 --- a/eval/src/vespa/eval/streamed/streamed_value_view.h +++ b/eval/src/vespa/eval/streamed/streamed_value_view.h @@ -24,7 +24,7 @@ private: public: StreamedValueView(const ValueType &type, size_t num_mapped_dimensions, TypedCells cells, size_t num_subspaces, - const std::vector<label_t> &labels) + const std::vector<string_id> &labels) : _type(type), _cells_ref(cells), _my_index(num_mapped_dimensions, num_subspaces, labels) |