aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--document/src/test/resources/tensor/multi_cell_tensor__cppbin107 -> 107 bytes
-rw-r--r--document/src/vespa/document/update/tensor_partial_update.cpp12
-rw-r--r--eval/src/tests/eval/fast_value/fast_value_test.cpp87
-rw-r--r--eval/src/tests/eval/simple_value/simple_value_test.cpp27
-rw-r--r--eval/src/tests/streamed/value/streamed_value_test.cpp27
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.cpp9
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.h152
-rw-r--r--eval/src/vespa/eval/eval/fast_value.cpp15
-rw-r--r--eval/src/vespa/eval/eval/fast_value.h2
-rw-r--r--eval/src/vespa/eval/eval/fast_value.hpp219
-rw-r--r--eval/src/vespa/eval/eval/label.h15
-rw-r--r--eval/src/vespa/eval/eval/simple_value.cpp59
-rw-r--r--eval/src/vespa/eval/eval/simple_value.h9
-rw-r--r--eval/src/vespa/eval/eval/value.cpp4
-rw-r--r--eval/src/vespa/eval/eval/value.h35
-rw-r--r--eval/src/vespa/eval/eval/value_codec.cpp18
-rw-r--r--eval/src/vespa/eval/instruction/generic_concat.cpp8
-rw-r--r--eval/src/vespa/eval/instruction/generic_create.cpp22
-rw-r--r--eval/src/vespa/eval/instruction/generic_join.cpp6
-rw-r--r--eval/src/vespa/eval/instruction/generic_join.h8
-rw-r--r--eval/src/vespa/eval/instruction/generic_merge.cpp8
-rw-r--r--eval/src/vespa/eval/instruction/generic_peek.cpp87
-rw-r--r--eval/src/vespa/eval/instruction/generic_reduce.cpp20
-rw-r--r--eval/src/vespa/eval/instruction/generic_rename.cpp12
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.cpp3
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.h12
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.h24
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp2
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.h2
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.cpp12
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.h25
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.h27
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.h5
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp128
-rw-r--r--searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp314
-rw-r--r--searchlib/src/vespa/searchlib/tensor/streamed_value_store.h108
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.cpp26
-rw-r--r--vespalib/src/vespa/vespalib/util/shared_string_repo.h83
41 files changed, 962 insertions, 674 deletions
diff --git a/document/src/test/resources/tensor/multi_cell_tensor__cpp b/document/src/test/resources/tensor/multi_cell_tensor__cpp
index deb53463fb5..9adda236a4a 100644
--- a/document/src/test/resources/tensor/multi_cell_tensor__cpp
+++ b/document/src/test/resources/tensor/multi_cell_tensor__cpp
Binary files differ
diff --git a/document/src/vespa/document/update/tensor_partial_update.cpp b/document/src/vespa/document/update/tensor_partial_update.cpp
index fbc60cc09af..f763c92741c 100644
--- a/document/src/vespa/document/update/tensor_partial_update.cpp
+++ b/document/src/vespa/document/update/tensor_partial_update.cpp
@@ -5,6 +5,7 @@
#include <vespa/vespalib/util/overload.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
#include <set>
@@ -43,7 +44,8 @@ struct DenseCoords {
}
~DenseCoords();
void clear() { offset = 0; current = 0; }
- void convert_label(vespalib::stringref label) {
+ void convert_label(label_t label_id) {
+ vespalib::string label = SharedStringRepo::Handle::string_from_id(label_id);
uint32_t coord = 0;
for (char c : label) {
if (c < '0' || c > '9') { // bad char
@@ -71,9 +73,9 @@ struct DenseCoords {
DenseCoords::~DenseCoords() = default;
struct SparseCoords {
- std::vector<vespalib::stringref> addr;
- std::vector<vespalib::stringref *> next_result_refs;
- std::vector<const vespalib::stringref *> lookup_refs;
+ std::vector<label_t> addr;
+ std::vector<label_t *> next_result_refs;
+ std::vector<const label_t *> lookup_refs;
std::vector<size_t> lookup_view_dims;
SparseCoords(size_t sz)
: addr(sz), next_result_refs(sz), lookup_refs(sz), lookup_view_dims(sz)
@@ -327,7 +329,7 @@ calc_mapped_dimension_indexes(const ValueType& input_type,
struct ModifierCoords {
- std::vector<const vespalib::stringref *> lookup_refs;
+ std::vector<const label_t *> lookup_refs;
std::vector<size_t> lookup_view_dims;
ModifierCoords(const SparseCoords& input_coords,
diff --git a/eval/src/tests/eval/fast_value/fast_value_test.cpp b/eval/src/tests/eval/fast_value/fast_value_test.cpp
index 03658d8351b..e809fb1bcda 100644
--- a/eval/src/tests/eval/fast_value/fast_value_test.cpp
+++ b/eval/src/tests/eval/fast_value/fast_value_test.cpp
@@ -8,6 +8,8 @@
using namespace vespalib;
using namespace vespalib::eval;
+using Handle = SharedStringRepo::Handle;
+
TEST(FastCellsTest, push_back_fast_works) {
FastCells<float> cells(3);
EXPECT_EQ(cells.capacity, 4);
@@ -60,38 +62,37 @@ TEST(FastCellsTest, add_cells_works) {
using SA = std::vector<vespalib::stringref>;
-TEST(FastValueBuilderTest, dense_add_subspace_robustness) {
+TEST(FastValueBuilderTest, scalar_add_subspace_robustness) {
auto factory = FastValueBuilderFactory::get();
- ValueType type = ValueType::from_spec("tensor(x[2])");
+ ValueType type = ValueType::from_spec("double");
auto builder = factory.create_value_builder<double>(type);
- auto subspace = builder->add_subspace({});
+ auto subspace = builder->add_subspace();
subspace[0] = 17.0;
- subspace[1] = 666;
- auto other = builder->add_subspace({});
- other[1] = 42.0;
+ auto other = builder->add_subspace();
+ other[0] = 42.0;
auto value = builder->build(std::move(builder));
+ EXPECT_EQ(value->index().size(), 1);
auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x[2])").
- add({{"x", 0}}, 17.0).
- add({{"x", 1}}, 42.0);
- EXPECT_EQ(actual, expected);
+ auto expected = TensorSpec("double").
+ add({}, 42.0);
+ EXPECT_EQ(actual, expected);
}
-TEST(FastValueBuilderTest, sparse_add_subspace_robustness) {
+TEST(FastValueBuilderTest, dense_add_subspace_robustness) {
auto factory = FastValueBuilderFactory::get();
- ValueType type = ValueType::from_spec("tensor(x{})");
+ ValueType type = ValueType::from_spec("tensor(x[2])");
auto builder = factory.create_value_builder<double>(type);
- auto subspace = builder->add_subspace(SA{"foo"});
+ auto subspace = builder->add_subspace();
subspace[0] = 17.0;
- subspace = builder->add_subspace(SA{"bar"});
- subspace[0] = 18.0;
- auto other = builder->add_subspace(SA{"foo"});
- other[0] = 42.0;
+ subspace[1] = 666;
+ auto other = builder->add_subspace();
+ other[1] = 42.0;
auto value = builder->build(std::move(builder));
+ EXPECT_EQ(value->index().size(), 1);
auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x{})").
- add({{"x", "bar"}}, 18.0).
- add({{"x", "foo"}}, 42.0);
+ auto expected = TensorSpec("tensor(x[2])").
+ add({{"x", 0}}, 17.0).
+ add({{"x", 1}}, 42.0);
EXPECT_EQ(actual, expected);
}
@@ -100,21 +101,43 @@ TEST(FastValueBuilderTest, mixed_add_subspace_robustness) {
ValueType type = ValueType::from_spec("tensor(x{},y[2])");
auto builder = factory.create_value_builder<double>(type);
auto subspace = builder->add_subspace(SA{"foo"});
- subspace[0] = 17.0;
- subspace[1] = 666;
+ subspace[0] = 1.0;
+ subspace[1] = 5.0;
subspace = builder->add_subspace(SA{"bar"});
- subspace[0] = 18.0;
- subspace[1] = 19.0;
+ subspace[0] = 2.0;
+ subspace[1] = 10.0;
auto other = builder->add_subspace(SA{"foo"});
- other[1] = 42.0;
+ other[0] = 3.0;
+ other[1] = 15.0;
auto value = builder->build(std::move(builder));
- auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x{},y[2])").
- add({{"x", "foo"}, {"y", 0}}, 17.0).
- add({{"x", "bar"}, {"y", 0}}, 18.0).
- add({{"x", "bar"}, {"y", 1}}, 19.0).
- add({{"x", "foo"}, {"y", 1}}, 42.0);
- EXPECT_EQ(actual, expected);
+ EXPECT_EQ(value->index().size(), 3);
+ Handle foo("foo");
+ Handle bar("bar");
+ label_t label;
+ label_t *label_ptr = &label;
+ size_t subspace_idx;
+ auto get_subspace = [&]() {
+ auto cells = value->cells().typify<double>();
+ return ConstArrayRef<double>(cells.begin() + subspace_idx * 2, 2);
+ };
+ auto view = value->index().create_view({});
+ view->lookup({});
+ while (view->next_result({&label_ptr, 1}, subspace_idx)) {
+ if (label == bar.id()) {
+ auto values = get_subspace();
+ EXPECT_EQ(values[0], 2.0);
+ EXPECT_EQ(values[1], 10.0);
+ } else {
+ EXPECT_EQ(label, foo.id());
+ auto values = get_subspace();
+ if (values[0] == 1) {
+ EXPECT_EQ(values[1], 5.0);
+ } else {
+ EXPECT_EQ(values[0], 3.0);
+ EXPECT_EQ(values[1], 15.0);
+ }
+ }
+ }
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp
index c05f9976e1a..1691d5c263c 100644
--- a/eval/src/tests/eval/simple_value/simple_value_test.cpp
+++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp
@@ -16,8 +16,12 @@ using namespace vespalib::eval::test;
using vespalib::make_string_short::fmt;
-using PA = std::vector<vespalib::stringref *>;
-using CPA = std::vector<const vespalib::stringref *>;
+using PA = std::vector<label_t *>;
+using CPA = std::vector<const label_t *>;
+
+using Handle = SharedStringRepo::Handle;
+
+vespalib::string as_str(label_t label) { return Handle::string_from_id(label); }
std::vector<Layout> layouts = {
{},
@@ -98,17 +102,18 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) {
std::unique_ptr<Value> value = builder->build(std::move(builder));
EXPECT_EQ(value->index().size(), 6);
auto view = value->index().create_view({0});
- vespalib::stringref query = "b";
- vespalib::stringref label;
+ Handle query_handle("b");
+ label_t query = query_handle.id();
+ label_t label;
size_t subspace;
+ std::map<vespalib::string,size_t> result;
view->lookup(CPA{&query});
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "aa");
- EXPECT_EQ(subspace, 2);
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "bb");
- EXPECT_EQ(subspace, 3);
- EXPECT_FALSE(view->next_result(PA{&label}, subspace));
+ while (view->next_result(PA{&label}, subspace)) {
+ result[as_str(label)] = subspace;
+ }
+ EXPECT_EQ(result.size(), 2);
+ EXPECT_EQ(result["aa"], 2);
+ EXPECT_EQ(result["bb"], 3);
}
TEST(SimpleValueTest, new_generic_join_works_for_simple_values) {
diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp
index 05d6e20451c..5221c4eda64 100644
--- a/eval/src/tests/streamed/value/streamed_value_test.cpp
+++ b/eval/src/tests/streamed/value/streamed_value_test.cpp
@@ -16,8 +16,12 @@ using namespace vespalib::eval::test;
using vespalib::make_string_short::fmt;
-using PA = std::vector<vespalib::stringref *>;
-using CPA = std::vector<const vespalib::stringref *>;
+using PA = std::vector<label_t *>;
+using CPA = std::vector<const label_t *>;
+
+using Handle = SharedStringRepo::Handle;
+
+vespalib::string as_str(label_t label) { return Handle::string_from_id(label); }
std::vector<Layout> layouts = {
{},
@@ -98,17 +102,18 @@ TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) {
std::unique_ptr<Value> value = builder->build(std::move(builder));
EXPECT_EQ(value->index().size(), 6);
auto view = value->index().create_view({0});
- vespalib::stringref query = "b";
- vespalib::stringref label;
+ Handle query_handle("b");
+ label_t query = query_handle.id();
+ label_t label;
size_t subspace;
+ std::map<vespalib::string,size_t> result;
view->lookup(CPA{&query});
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "aa");
- EXPECT_EQ(subspace, 2);
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "bb");
- EXPECT_EQ(subspace, 3);
- EXPECT_FALSE(view->next_result(PA{&label}, subspace));
+ while (view->next_result(PA{&label}, subspace)) {
+ result[as_str(label)] = subspace;
+ }
+ EXPECT_EQ(result.size(), 2);
+ EXPECT_EQ(result["aa"], 2);
+ EXPECT_EQ(result["bb"], 3);
}
TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) {
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 01eeff49662..5f8dd478a7b 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_add_library(eval_eval OBJECT
delete_node.cpp
dense_cells_value.cpp
double_value_builder.cpp
+ fast_addr_map.cpp
fast_forest.cpp
fast_sparse_map.cpp
fast_value.cpp
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.cpp b/eval/src/vespa/eval/eval/fast_addr_map.cpp
new file mode 100644
index 00000000000..73163f411e6
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_addr_map.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fast_addr_map.h"
+
+namespace vespalib::eval {
+
+FastAddrMap::~FastAddrMap() = default;
+
+}
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h
new file mode 100644
index 00000000000..a8a82718a28
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_addr_map.h
@@ -0,0 +1,152 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "label.h"
+#include "memory_usage_stuff.h"
+#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/stllike/identity.h>
+#include <vespa/vespalib/stllike/hashtable.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
+#include <vector>
+
+namespace vespalib::eval {
+
+/**
+ * A wrapper around vespalib::hashtable, using it to map a list of
+ * labels (a sparse address) to an integer value (dense subspace
+ * index). Labels are represented by string enum values stored and
+ * handled outside this class.
+ **/
+class FastAddrMap
+{
+public:
+ // label hasing functions
+ static constexpr uint32_t hash_label(label_t label) { return label; }
+ static constexpr uint32_t hash_label(const label_t *label) { return *label; }
+ static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) {
+ return ((full_hash * 31) + next_hash);
+ }
+ template <typename T>
+ static constexpr uint32_t hash_labels(ConstArrayRef<T> addr) {
+ uint32_t hash = 0;
+ for (const T &label: addr) {
+ hash = combine_label_hash(hash, hash_label(label));
+ }
+ return hash;
+ }
+
+ // typed uint32_t index used to identify sparse address/dense subspace
+ struct Tag {
+ uint32_t idx;
+ static constexpr uint32_t npos() { return uint32_t(-1); }
+ static constexpr Tag make_invalid() { return Tag{npos()}; }
+ constexpr bool valid() const { return (idx != npos()); }
+ };
+
+ // sparse hash set entry
+ struct Entry {
+ Tag tag;
+ uint32_t hash;
+ };
+
+ // alternative key(s) used for lookup in sparse hash set
+ template <typename T> struct AltKey {
+ ConstArrayRef<T> key;
+ uint32_t hash;
+ };
+
+ // view able to convert tags into sparse addresses
+ struct LabelView {
+ size_t addr_size;
+ const std::vector<label_t> &labels;
+ LabelView(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view)
+ : addr_size(num_mapped_dims), labels(handle_view.handles()) {}
+ ConstArrayRef<label_t> get_addr(size_t idx) const {
+ return {&labels[idx * addr_size], addr_size};
+ }
+ };
+
+ // hashing functor for sparse hash set
+ struct Hash {
+ template <typename T>
+ constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; }
+ constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; }
+ };
+
+ // equality functor for sparse hash set
+ struct Equal {
+ const LabelView &label_view;
+ Equal(const LabelView &label_view_in) : label_view(label_view_in) {}
+ static constexpr bool eq_labels(label_t a, label_t b) { return (a == b); }
+ static constexpr bool eq_labels(label_t a, const label_t *b) { return (a == *b); }
+ template <typename T>
+ bool operator()(const Entry &a, const AltKey<T> &b) const {
+ if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) {
+ return false;
+ }
+ auto a_key = label_view.get_addr(a.tag.idx);
+ for (size_t i = 0; i < a_key.size(); ++i) {
+ if (!eq_labels(a_key[i], b.key[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+
+ using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>;
+
+private:
+ LabelView _labels;
+ HashType _map;
+
+public:
+ FastAddrMap(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view, size_t expected_subspaces)
+ : _labels(num_mapped_dims, handle_view),
+ _map(expected_subspaces * 2, Hash(), Equal(_labels)) {}
+ ~FastAddrMap();
+ FastAddrMap(const FastAddrMap &) = delete;
+ FastAddrMap &operator=(const FastAddrMap &) = delete;
+ FastAddrMap(FastAddrMap &&) = delete;
+ FastAddrMap &operator=(FastAddrMap &&) = delete;
+ static constexpr size_t npos() { return -1; }
+ ConstArrayRef<label_t> get_addr(size_t idx) const { return _labels.get_addr(idx); }
+ size_t size() const { return _map.size(); }
+ constexpr size_t addr_size() const { return _labels.addr_size; }
+ template <typename T>
+ size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const {
+ AltKey<T> key{addr, hash};
+ auto pos = _map.find(key);
+ return (pos == _map.end()) ? npos() : pos->tag.idx;
+ }
+ template <typename T>
+ size_t lookup(ConstArrayRef<T> addr) const {
+ return lookup(addr, hash_labels(addr));
+ }
+ void add_mapping(uint32_t hash) {
+ uint32_t idx = _map.size();
+ _map.force_insert(Entry{{idx}, hash});
+ }
+ template <typename F>
+ void each_map_entry(F &&f) const {
+ _map.for_each([&](const auto &entry)
+ {
+ f(entry.tag.idx, entry.hash);
+ });
+ }
+ MemoryUsage estimate_extra_memory_usage() const {
+ MemoryUsage extra_usage;
+ size_t map_self_size = sizeof(_map);
+ size_t map_used = _map.getMemoryUsed();
+ size_t map_allocated = _map.getMemoryConsumption();
+ // avoid double-counting the map itself
+ map_used = std::min(map_used, map_used - map_self_size);
+ map_allocated = std::min(map_allocated, map_allocated - map_self_size);
+ extra_usage.incUsedBytes(map_used);
+ extra_usage.incAllocatedBytes(map_allocated);
+ return extra_usage;
+ }
+};
+
+}
diff --git a/eval/src/vespa/eval/eval/fast_value.cpp b/eval/src/vespa/eval/eval/fast_value.cpp
index 116e561a868..96d0fa84149 100644
--- a/eval/src/vespa/eval/eval/fast_value.cpp
+++ b/eval/src/vespa/eval/eval/fast_value.cpp
@@ -11,7 +11,7 @@ namespace vespalib::eval {
namespace {
struct CreateFastValueBuilderBase {
- template <typename T> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type,
+ template <typename T, typename R2> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces)
{
assert(check_cell_type<T>(type.cell_type()));
@@ -20,7 +20,7 @@ struct CreateFastValueBuilderBase {
} else if (num_mapped_dims == 0) {
return std::make_unique<FastDenseValue<T>>(type, subspace_size);
} else {
- return std::make_unique<FastValue<T>>(type, num_mapped_dims, subspace_size, expected_subspaces);
+ return std::make_unique<FastValue<T,R2::value>>(type, num_mapped_dims, subspace_size, expected_subspaces);
}
}
};
@@ -32,11 +32,11 @@ struct CreateFastValueBuilderBase {
std::unique_ptr<Value::Index::View>
FastValueIndex::create_view(const std::vector<size_t> &dims) const
{
- if (map.num_dims() == 0) {
+ if (map.addr_size() == 0) {
return TrivialIndex::get().create_view(dims);
} else if (dims.empty()) {
return std::make_unique<FastIterateView>(map);
- } else if (dims.size() == map.num_dims()) {
+ } else if (dims.size() == map.addr_size()) {
return std::make_unique<FastLookupView>(map);
} else {
return std::make_unique<FastFilterView>(map, dims);
@@ -49,10 +49,11 @@ FastValueBuilderFactory::FastValueBuilderFactory() = default;
FastValueBuilderFactory FastValueBuilderFactory::_factory;
std::unique_ptr<ValueBuilderBase>
-FastValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size,
- size_t expected_subspaces) const
+FastValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size,
+ size_t expected_subspaces) const
{
- return typify_invoke<1,TypifyCellType,CreateFastValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces);
+ using MyTypify = TypifyValue<TypifyCellType,TypifyBool>;
+ return typify_invoke<2,MyTypify,CreateFastValueBuilderBase>(type.cell_type(), transient, type, num_mapped_dims, subspace_size, expected_subspaces);
}
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/fast_value.h b/eval/src/vespa/eval/eval/fast_value.h
index ac924ecc6eb..c6280b492db 100644
--- a/eval/src/vespa/eval/eval/fast_value.h
+++ b/eval/src/vespa/eval/eval/fast_value.h
@@ -19,7 +19,7 @@ class FastValueBuilderFactory : public ValueBuilderFactory {
private:
FastValueBuilderFactory();
static FastValueBuilderFactory _factory;
- std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override;
public:
static const FastValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 9914378cc9e..972aa68b8bd 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -1,11 +1,10 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "value.h"
-#include "fast_sparse_map.h"
+#include "fast_addr_map.h"
#include "inline_operation.h"
#include <vespa/eval/instruction/generic_join.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-#include <vespa/vespalib/util/alloc.h>
+#include <vespa/vespalib/stllike/hashtable.hpp>
namespace vespalib::eval {
@@ -18,22 +17,22 @@ namespace {
// look up a full address in the map directly
struct FastLookupView : public Value::Index::View {
- const FastSparseMap &map;
- size_t subspace;
+ const FastAddrMap &map;
+ size_t subspace;
- FastLookupView(const FastSparseMap &map_in)
- : map(map_in), subspace(FastSparseMap::npos()) {}
+ FastLookupView(const FastAddrMap &map_in)
+ : map(map_in), subspace(FastAddrMap::npos()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
subspace = map.lookup(addr);
}
- bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override {
- if (subspace == FastSparseMap::npos()) {
+ bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override {
+ if (subspace == FastAddrMap::npos()) {
return false;
}
idx_out = subspace;
- subspace = FastSparseMap::npos();
+ subspace = FastAddrMap::npos();
return true;
}
};
@@ -43,30 +42,27 @@ struct FastLookupView : public Value::Index::View {
// find matching mappings for a partial address with brute force filtering
struct FastFilterView : public Value::Index::View {
- using Label = FastSparseMap::HashedLabel;
-
- size_t num_mapped_dims;
- const std::vector<Label> &labels;
+ const FastAddrMap &map;
std::vector<size_t> match_dims;
std::vector<size_t> extract_dims;
- std::vector<Label> query;
+ std::vector<label_t> query;
size_t pos;
- bool is_match() const {
+ bool is_match(ConstArrayRef<label_t> addr) const {
for (size_t i = 0; i < query.size(); ++i) {
- if (query[i].hash != labels[pos + match_dims[i]].hash) {
+ if (query[i] != addr[match_dims[i]]) {
return false;
}
}
return true;
}
- FastFilterView(const FastSparseMap &map, const std::vector<size_t> &match_dims_in)
- : num_mapped_dims(map.num_dims()), labels(map.labels()), match_dims(match_dims_in),
- extract_dims(), query(match_dims.size(), Label()), pos(labels.size())
+ FastFilterView(const FastAddrMap &map_in, const std::vector<size_t> &match_dims_in)
+ : map(map_in), match_dims(match_dims_in),
+ extract_dims(), query(match_dims.size()), pos(FastAddrMap::npos())
{
auto my_pos = match_dims.begin();
- for (size_t i = 0; i < num_mapped_dims; ++i) {
+ for (size_t i = 0; i < map.addr_size(); ++i) {
if ((my_pos == match_dims.end()) || (*my_pos != i)) {
extract_dims.push_back(i);
} else {
@@ -74,29 +70,29 @@ struct FastFilterView : public Value::Index::View {
}
}
assert(my_pos == match_dims.end());
- assert((match_dims.size() + extract_dims.size()) == num_mapped_dims);
+ assert((match_dims.size() + extract_dims.size()) == map.addr_size());
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == query.size());
for (size_t i = 0; i < addr.size(); ++i) {
- query[i] = Label(*addr[i]);
+ query[i] = *addr[i];
}
pos = 0;
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
- while (pos < labels.size()) {
- if (is_match()) {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
+ while (pos < map.size()) {
+ auto addr = map.get_addr(pos);
+ if (is_match(addr)) {
assert(addr_out.size() == extract_dims.size());
for (size_t i = 0; i < extract_dims.size(); ++i) {
- *addr_out[i] = labels[pos + extract_dims[i]].label;
+ *addr_out[i] = addr[extract_dims[i]];
}
- idx_out = (pos / num_mapped_dims); // is this expensive?
- pos += num_mapped_dims;
+ idx_out = pos++;
return true;
}
- pos += num_mapped_dims;
+ ++pos;
}
return false;
}
@@ -107,29 +103,26 @@ struct FastFilterView : public Value::Index::View {
// iterate all mappings
struct FastIterateView : public Value::Index::View {
- using Labels = std::vector<FastSparseMap::HashedLabel>;
-
- size_t num_mapped_dims;
- const Labels &labels;
- size_t pos;
+ const FastAddrMap &map;
+ size_t pos;
- FastIterateView(const FastSparseMap &map)
- : num_mapped_dims(map.num_dims()), labels(map.labels()), pos(labels.size()) {}
+ FastIterateView(const FastAddrMap &map_in)
+ : map(map_in), pos(FastAddrMap::npos()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*>) override {
+ void lookup(ConstArrayRef<const label_t*>) override {
pos = 0;
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
- if (pos >= labels.size()) {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
+ if (pos >= map.size()) {
return false;
}
- assert(addr_out.size() == num_mapped_dims);
- for (size_t i = 0; i < num_mapped_dims; ++i) {
- *addr_out[i] = labels[pos + i].label;
+ auto addr = map.get_addr(pos);
+ assert(addr.size() == addr_out.size());
+ for (size_t i = 0; i < addr.size(); ++i) {
+ *addr_out[i] = addr[i];
}
- idx_out = (pos / num_mapped_dims); // is this expensive?
- pos += num_mapped_dims;
+ idx_out = pos++;
return true;
}
};
@@ -145,9 +138,9 @@ using JoinAddrSource = instruction::SparseJoinPlan::Source;
// operations by calling inline functions directly.
struct FastValueIndex final : Value::Index {
- FastSparseMap map;
- FastValueIndex(size_t num_mapped_dims_in, size_t expected_subspaces_in)
- : map(num_mapped_dims_in, expected_subspaces_in) {}
+ FastAddrMap map;
+ FastValueIndex(size_t num_mapped_dims_in, SharedStringRepo::HandleView handle_view, size_t expected_subspaces_in)
+ : map(num_mapped_dims_in, handle_view, expected_subspaces_in) {}
template <typename LCT, typename RCT, typename OCT, typename Fun>
static const Value &sparse_full_overlap_join(const ValueType &res_type, const Fun &fun,
@@ -220,31 +213,64 @@ struct FastCells {
//-----------------------------------------------------------------------------
-template <typename T>
+template <typename T, bool transient>
struct FastValue final : Value, ValueBuilder<T> {
+ using Handles = std::conditional<transient,
+ SharedStringRepo::WeakHandles,
+ SharedStringRepo::StrongHandles>::type;
+
ValueType my_type;
size_t my_subspace_size;
+ Handles my_handles;
FastValueIndex my_index;
FastCells<T> my_cells;
FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in)
: my_type(type_in), my_subspace_size(subspace_size_in),
- my_index(num_mapped_dims_in, expected_subspaces_in),
+ my_handles(expected_subspaces_in * num_mapped_dims_in),
+ my_index(num_mapped_dims_in, my_handles.view(), expected_subspaces_in),
my_cells(subspace_size_in * expected_subspaces_in) {}
~FastValue() override;
const ValueType &type() const override { return my_type; }
const Value::Index &index() const override { return my_index; }
TypedCells cells() const override { return TypedCells(my_cells.memory, get_cell_type<T>(), my_cells.size); }
+ void add_mapping(ConstArrayRef<vespalib::stringref> addr) {
+ if constexpr (transient) {
+ (void) addr;
+ abort(); // cannot use this for transient values
+ } else {
+ uint32_t hash = 0;
+ for (const auto &label: addr) {
+ hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(my_handles.add(label)));
+ }
+ my_index.map.add_mapping(hash);
+ }
+ }
+ void add_mapping(ConstArrayRef<label_t> addr) {
+ uint32_t hash = 0;
+ for (label_t label: addr) {
+ hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(label));
+ my_handles.add(label);
+ }
+ my_index.map.add_mapping(hash);
+ }
+ void add_mapping(ConstArrayRef<label_t> addr, uint32_t hash) {
+ for (label_t label: addr) {
+ my_handles.add(label);
+ }
+ my_index.map.add_mapping(hash);
+ }
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
- size_t idx = my_index.map.add_mapping(addr) * my_subspace_size;
- if (__builtin_expect((idx == my_cells.size), true)) {
- return my_cells.add_cells(my_subspace_size);
- }
- return ArrayRef<T>(my_cells.get(idx), my_subspace_size);
+ add_mapping(addr);
+ return my_cells.add_cells(my_subspace_size);
+ }
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override {
+ add_mapping(addr);
+ return my_cells.add_cells(my_subspace_size);
}
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
- if (my_index.map.num_dims() == 0) {
+ if (my_index.map.addr_size() == 0) {
assert(my_index.map.size() == 1);
}
assert(my_cells.size == (my_index.map.size() * my_subspace_size));
@@ -254,13 +280,14 @@ struct FastValue final : Value, ValueBuilder<T> {
return std::unique_ptr<Value>(this);
}
MemoryUsage get_memory_usage() const override {
- MemoryUsage usage = self_memory_usage<FastValue<T>>();
+ MemoryUsage usage = self_memory_usage<FastValue<T,transient>>();
+ usage.merge(vector_extra_memory_usage(my_handles.view().handles()));
usage.merge(my_index.map.estimate_extra_memory_usage());
usage.merge(my_cells.estimate_extra_memory_usage());
return usage;
}
};
-template <typename T> FastValue<T>::~FastValue() = default;
+template <typename T,bool transient> FastValue<T,transient>::~FastValue() = default;
//-----------------------------------------------------------------------------
@@ -282,6 +309,9 @@ struct FastDenseValue final : Value, ValueBuilder<T> {
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) override {
return ArrayRef<T>(my_cells.get(0), my_cells.size);
}
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t>) override {
+ return ArrayRef<T>(my_cells.get(0), my_cells.size);
+ }
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
ValueBuilder<T>* me = this;
assert(me == self.get());
@@ -289,7 +319,7 @@ struct FastDenseValue final : Value, ValueBuilder<T> {
return std::unique_ptr<Value>(this);
}
MemoryUsage get_memory_usage() const override {
- MemoryUsage usage = self_memory_usage<FastValue<T>>();
+ MemoryUsage usage = self_memory_usage<FastDenseValue<T>>();
usage.merge(my_cells.estimate_extra_memory_usage());
return usage;
}
@@ -302,6 +332,7 @@ template <typename T>
struct FastScalarBuilder final : ValueBuilder<T> {
T _value;
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) final override { return ArrayRef<T>(&_value, 1); }
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t>) final override { return ArrayRef<T>(&_value, 1); };
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) final override { return std::make_unique<ScalarValue<T>>(_value); }
};
@@ -313,19 +344,16 @@ FastValueIndex::sparse_full_overlap_join(const ValueType &res_type, const Fun &f
const FastValueIndex &lhs, const FastValueIndex &rhs,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
- auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, lhs.map.size());
- auto &result_map = result.my_index.map;
- lhs.map.each_map_entry([&](auto lhs_subspace, auto hash)
- {
- auto rhs_subspace = rhs.map.lookup(hash);
- if (rhs_subspace != FastSparseMap::npos()) {
- auto idx = result_map.add_mapping(lhs.map.make_addr(lhs_subspace), hash);
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- result.my_cells.push_back_fast(cell_value);
- }
- }
- });
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, lhs.map.size());
+ lhs.map.each_map_entry([&](auto lhs_subspace, auto hash) {
+ auto lhs_addr = lhs.map.get_addr(lhs_subspace);
+ auto rhs_subspace = rhs.map.lookup(lhs_addr, hash);
+ if (rhs_subspace != FastAddrMap::npos()) {
+ result.add_mapping(lhs_addr, hash);
+ auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
+ result.my_cells.push_back_fast(cell_value);
+ }
+ });
return result;
}
@@ -338,10 +366,9 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun
const std::vector<JoinAddrSource> &addr_sources,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
- using HashedLabelRef = std::reference_wrapper<const FastSparseMap::HashedLabel>;
size_t num_mapped_dims = addr_sources.size();
- auto &result = stash.create<FastValue<OCT>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size());
- std::vector<HashedLabelRef> output_addr(num_mapped_dims, FastSparseMap::empty_label);
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size());
+ std::vector<label_t> output_addr(num_mapped_dims);
std::vector<size_t> store_lhs_idx;
std::vector<size_t> store_rhs_idx;
size_t out_idx = 0;
@@ -359,24 +386,22 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun
}
assert(out_idx == output_addr.size());
for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) {
- auto l_addr = lhs.map.make_addr(lhs_subspace);
+ auto l_addr = lhs.map.get_addr(lhs_subspace);
assert(l_addr.size() == store_lhs_idx.size());
for (size_t i = 0; i < store_lhs_idx.size(); ++i) {
size_t addr_idx = store_lhs_idx[i];
output_addr[addr_idx] = l_addr[i];
}
for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) {
- auto r_addr = rhs.map.make_addr(rhs_subspace);
+ auto r_addr = rhs.map.get_addr(rhs_subspace);
assert(r_addr.size() == store_rhs_idx.size());
for (size_t i = 0; i < store_rhs_idx.size(); ++i) {
size_t addr_idx = store_rhs_idx[i];
output_addr[addr_idx] = r_addr[i];
}
- auto idx = result.my_index.map.add_mapping(ConstArrayRef(output_addr));
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- result.my_cells.push_back_fast(cell_value);
- }
+ result.add_mapping(ConstArrayRef(output_addr));
+ auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
+ result.my_cells.push_back_fast(cell_value);
}
}
return result;
@@ -391,22 +416,22 @@ FastValueIndex::sparse_only_merge(const ValueType &res_type, const Fun &fun,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
size_t guess_size = lhs.map.size() + rhs.map.size();
- auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, guess_size);
- result.my_index = lhs;
- for (auto val : lhs_cells) {
- result.my_cells.push_back_fast(val);
- }
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, guess_size);
+ lhs.map.each_map_entry([&](auto lhs_subspace, auto hash)
+ {
+ result.add_mapping(lhs.map.get_addr(lhs_subspace), hash);
+ result.my_cells.push_back_fast(lhs_cells[lhs_subspace]);
+ });
rhs.map.each_map_entry([&](auto rhs_subspace, auto hash)
{
- auto lhs_subspace = lhs.map.lookup(hash);
- if (lhs_subspace == FastSparseMap::npos()) {
- auto idx = result.my_index.map.add_mapping(rhs.map.make_addr(rhs_subspace), hash);
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- result.my_cells.push_back_fast(rhs_cells[rhs_subspace]);
- }
+ auto rhs_addr = rhs.map.get_addr(rhs_subspace);
+ auto result_subspace = result.my_index.map.lookup(rhs_addr, hash);
+ if (result_subspace == FastAddrMap::npos()) {
+ result.add_mapping(rhs_addr, hash);
+ result.my_cells.push_back_fast(rhs_cells[rhs_subspace]);
} else {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- *result.my_cells.get(lhs_subspace) = cell_value;
+ OCT &out_cell = *result.my_cells.get(result_subspace);
+ out_cell = fun(out_cell, rhs_cells[rhs_subspace]);
}
});
return result;
diff --git a/eval/src/vespa/eval/eval/label.h b/eval/src/vespa/eval/eval/label.h
new file mode 100644
index 00000000000..931f96a4f1a
--- /dev/null
+++ b/eval/src/vespa/eval/eval/label.h
@@ -0,0 +1,15 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace vespalib::eval {
+
+// We use string ids from SharedStringRepo as labels. Note that
+// label_t represents the lightweight reference type. Other structures
+// (Handle/StrongHandles) are needed to keep the id valid.
+
+using label_t = uint32_t;
+
+}
diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp
index 113f89f77fb..0cbbb29ecf1 100644
--- a/eval/src/vespa/eval/eval/simple_value.cpp
+++ b/eval/src/vespa/eval/eval/simple_value.cpp
@@ -30,7 +30,8 @@ struct CreateSimpleValueBuilderBase {
// look up a full address in the map directly
struct SimpleLookupView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
@@ -38,17 +39,17 @@ struct SimpleLookupView : public Value::Index::View {
Map::const_iterator pos;
SimpleLookupView(const Map &map_in, size_t num_dims)
- : map(map_in), my_addr(num_dims, ""), pos(map.end()) {}
+ : map(map_in), my_addr(num_dims), pos(map.end()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == my_addr.size());
for (size_t i = 0; i < my_addr.size(); ++i) {
- my_addr[i] = *addr[i];
+ my_addr[i] = Handle::handle_from_id(*addr[i]);
}
pos = map.find(my_addr);
}
- bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override {
if (pos == map.end()) {
return false;
}
@@ -63,13 +64,14 @@ struct SimpleLookupView : public Value::Index::View {
// find matching mappings for a partial address with brute force filtering
struct SimpleFilterView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
std::vector<size_t> match_dims;
std::vector<size_t> extract_dims;
- std::vector<vespalib::string> query;
+ std::vector<Handle> query;
Map::const_iterator pos;
bool is_match() const {
@@ -82,7 +84,7 @@ struct SimpleFilterView : public Value::Index::View {
}
SimpleFilterView(const Map &map_in, const std::vector<size_t> &match_dims_in, size_t num_dims)
- : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size(), ""), pos(map.end())
+ : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size()), pos(map.end())
{
auto my_pos = match_dims.begin();
for (size_t i = 0; i < num_dims; ++i) {
@@ -96,20 +98,20 @@ struct SimpleFilterView : public Value::Index::View {
assert((match_dims.size() + extract_dims.size()) == num_dims);
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == query.size());
for (size_t i = 0; i < addr.size(); ++i) {
- query[i] = *addr[i];
+ query[i] = Handle::handle_from_id(*addr[i]);
}
pos = map.begin();
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
while (pos != map.end()) {
if (is_match()) {
assert(addr_out.size() == extract_dims.size());
for (size_t i = 0; i < extract_dims.size(); ++i) {
- *addr_out[i] = pos->first[extract_dims[i]];
+ *addr_out[i] = pos->first[extract_dims[i]].id();
}
idx_out = pos->second;
++pos;
@@ -126,7 +128,8 @@ struct SimpleFilterView : public Value::Index::View {
// iterate all mappings
struct SimpleIterateView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
@@ -135,17 +138,17 @@ struct SimpleIterateView : public Value::Index::View {
SimpleIterateView(const Map &map_in)
: map(map_in), pos(map.end()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*>) override {
+ void lookup(ConstArrayRef<const label_t*>) override {
pos = map.begin();
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
if (pos == map.end()) {
return false;
}
assert(addr_out.size() == pos->first.size());
for (size_t i = 0; i < addr_out.size(); ++i) {
- *addr_out[i] = pos->first[i];
+ *addr_out[i] = pos->first[i].id();
}
idx_out = pos->second;
++pos;
@@ -182,6 +185,17 @@ SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr)
assert(was_inserted);
}
+void
+SimpleValue::add_mapping(ConstArrayRef<label_t> addr)
+{
+ Labels my_addr;
+ for(label_t label: addr) {
+ my_addr.emplace_back(Handle::handle_from_id(label));
+ }
+ auto [ignore, was_inserted] = _index.emplace(my_addr, _index.size());
+ assert(was_inserted);
+}
+
MemoryUsage
SimpleValue::estimate_extra_memory_usage() const
{
@@ -246,15 +260,26 @@ SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr)
return ArrayRef<T>(&_cells[old_size], subspace_size());
}
+template <typename T>
+ArrayRef<T>
+SimpleValueT<T>::add_subspace(ConstArrayRef<label_t> addr)
+{
+ size_t old_size = _cells.size();
+ add_mapping(addr);
+ _cells.resize(old_size + subspace_size(), std::numeric_limits<T>::quiet_NaN());
+ return ArrayRef<T>(&_cells[old_size], subspace_size());
+}
+
//-----------------------------------------------------------------------------
SimpleValueBuilderFactory::SimpleValueBuilderFactory() = default;
SimpleValueBuilderFactory SimpleValueBuilderFactory::_factory;
std::unique_ptr<ValueBuilderBase>
-SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size,
+SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size,
size_t expected_subspaces) const
{
+ (void) transient;
return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces);
}
diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h
index 590c0b4ef16..1fd645b704c 100644
--- a/eval/src/vespa/eval/eval/simple_value.h
+++ b/eval/src/vespa/eval/eval/simple_value.h
@@ -3,7 +3,7 @@
#pragma once
#include "value.h"
-#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <vector>
#include <map>
@@ -26,7 +26,8 @@ class TensorSpec;
class SimpleValue : public Value, public Value::Index
{
private:
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
ValueType _type;
size_t _num_mapped_dims;
@@ -36,6 +37,7 @@ protected:
size_t num_mapped_dims() const { return _num_mapped_dims; }
size_t subspace_size() const { return _subspace_size; }
void add_mapping(ConstArrayRef<vespalib::stringref> addr);
+ void add_mapping(ConstArrayRef<label_t> addr);
MemoryUsage estimate_extra_memory_usage() const;
public:
SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in);
@@ -62,6 +64,7 @@ public:
~SimpleValueT() override;
TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); }
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override;
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override;
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
if (num_mapped_dims() == 0) {
assert(size() == 1);
@@ -87,7 +90,7 @@ class SimpleValueBuilderFactory : public ValueBuilderFactory {
private:
SimpleValueBuilderFactory();
static SimpleValueBuilderFactory _factory;
- std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override;
public:
static const SimpleValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp
index 7abc8d568cb..73c7c40636c 100644
--- a/eval/src/vespa/eval/eval/value.cpp
+++ b/eval/src/vespa/eval/eval/value.cpp
@@ -12,8 +12,8 @@ namespace {
struct TrivialView : Value::Index::View {
bool first = false;
- void lookup(ConstArrayRef<const vespalib::stringref*> ) override { first = true; }
- bool next_result(ConstArrayRef<vespalib::stringref*> , size_t &idx_out) override {
+ void lookup(ConstArrayRef<const label_t*> ) override { first = true; }
+ bool next_result(ConstArrayRef<label_t*> , size_t &idx_out) override {
if (first) {
idx_out = 0;
first = false;
diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h
index 186c3698dcd..2efb7d7c1e4 100644
--- a/eval/src/vespa/eval/eval/value.h
+++ b/eval/src/vespa/eval/eval/value.h
@@ -2,6 +2,7 @@
#pragma once
+#include "label.h"
#include "memory_usage_stuff.h"
#include "value_type.h"
#include "typed_cells.h"
@@ -36,13 +37,13 @@ struct Value {
// partial address for the dimensions given to
// create_view. Results from the lookup is extracted using
// the next_result function.
- virtual void lookup(ConstArrayRef<const vespalib::stringref*> addr) = 0;
+ virtual void lookup(ConstArrayRef<const label_t*> addr) = 0;
// Extract the next result (if any) from the previous
// lookup into the given partial address and index. Only
// the labels for the dimensions NOT specified in
// create_view will be extracted here.
- virtual bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) = 0;
+ virtual bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) = 0;
virtual ~View() {}
};
@@ -163,6 +164,14 @@ struct ValueBuilder : ValueBuilderBase {
// is not allowed.
virtual ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) = 0;
+ // add a dense subspace for the given address where labels are
+ // specified by shared string repo ids. Note that the caller is
+ // responsible for making sure the ids are valid 'long enough'.
+ virtual ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) = 0;
+
+ // convenience function to add a subspace with an empty address
+ ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<label_t>()); }
+
// Given the ownership of the builder itself, produce the newly
// created value. This means that builders can only be used once,
// it also means values can build themselves.
@@ -179,26 +188,40 @@ struct ValueBuilder : ValueBuilderBase {
* builder. With interoperability between all values.
**/
struct ValueBuilderFactory {
+private:
template <typename T>
- std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type,
+ std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, bool transient,
size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
{
assert(check_cell_type<T>(type.cell_type()));
- auto base = create_value_builder_base(type, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ auto base = create_value_builder_base(type, transient, num_mapped_dims_in, subspace_size_in, expected_subspaces);
ValueBuilder<T> *builder = dynamic_cast<ValueBuilder<T>*>(base.get());
assert(builder);
base.release();
return std::unique_ptr<ValueBuilder<T>>(builder);
}
+public:
+ template <typename T>
+ std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type,
+ size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
+ {
+ return create_value_builder<T>(type, false, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ }
+ template <typename T>
+ std::unique_ptr<ValueBuilder<T>> create_transient_value_builder(const ValueType &type,
+ size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
+ {
+ return create_value_builder<T>(type, true, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ }
template <typename T>
std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type) const
{
- return create_value_builder<T>(type, type.count_mapped_dimensions(), type.dense_subspace_size(), 1);
+ return create_value_builder<T>(type, false, type.count_mapped_dimensions(), type.dense_subspace_size(), 1);
}
std::unique_ptr<Value> copy(const Value &value) const;
virtual ~ValueBuilderFactory() {}
protected:
- virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const = 0;
};
diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp
index 923d3f29cd3..53131da86d8 100644
--- a/eval/src/vespa/eval/eval/value_codec.cpp
+++ b/eval/src/vespa/eval/eval/value_codec.cpp
@@ -7,6 +7,7 @@
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
using vespalib::make_string_short::fmt;
@@ -128,9 +129,10 @@ size_t maybe_decode_num_blocks(nbostream &input, bool has_mapped_dims, const For
return 1;
}
-void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<vespalib::stringref> &addr) {
+void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<label_t> &addr) {
for (size_t i = 0; i < num_mapped_dims; ++i) {
- output.writeSmallString(addr[i]);
+ vespalib::string str = SharedStringRepo::Handle::string_from_id(addr[i]);
+ output.writeSmallString(str);
}
}
@@ -175,7 +177,7 @@ struct ContentDecoder {
}
// add implicit empty subspace
if ((state.num_mapped_dims == 0) && (state.num_blocks == 0)) {
- for (T &cell: builder->add_subspace({})) {
+ for (T &cell: builder->add_subspace()) {
cell = T{};
}
}
@@ -229,8 +231,8 @@ struct CreateTensorSpecFromValue {
TensorSpec spec(value.type().to_spec());
size_t subspace_id = 0;
size_t subspace_size = value.type().dense_subspace_size();
- std::vector<vespalib::stringref> labels(value.type().count_mapped_dimensions());
- std::vector<vespalib::stringref*> label_refs;
+ std::vector<label_t> labels(value.type().count_mapped_dimensions());
+ std::vector<label_t*> label_refs;
for (auto &label: labels) {
label_refs.push_back(&label);
}
@@ -241,7 +243,7 @@ struct CreateTensorSpecFromValue {
TensorSpec::Address addr;
for (const auto &dim: value.type().dimensions()) {
if (dim.is_mapped()) {
- addr.emplace(dim.name, labels[label_idx++]);
+ addr.emplace(dim.name, SharedStringRepo::Handle::string_from_id(labels[label_idx++]));
}
}
for (size_t i = 0; i < subspace_size; ++i) {
@@ -270,8 +272,8 @@ struct EncodeState {
struct ContentEncoder {
template<typename T>
static void invoke(const Value &value, const EncodeState &state, nbostream &output) {
- std::vector<vespalib::stringref> address(state.num_mapped_dims);
- std::vector<vespalib::stringref*> a_refs(state.num_mapped_dims);;
+ std::vector<label_t> address(state.num_mapped_dims);
+ std::vector<label_t*> a_refs(state.num_mapped_dims);;
for (size_t i = 0; i < state.num_mapped_dims; ++i) {
a_refs[i] = &address[i];
}
diff --git a/eval/src/vespa/eval/instruction/generic_concat.cpp b/eval/src/vespa/eval/instruction/generic_concat.cpp
index fa9d2192b99..5d8ab7187c0 100644
--- a/eval/src/vespa/eval/instruction/generic_concat.cpp
+++ b/eval/src/vespa/eval/instruction/generic_concat.cpp
@@ -47,10 +47,10 @@ generic_concat(const Value &a, const Value &b,
auto a_cells = a.cells().typify<LCT>();
auto b_cells = b.cells().typify<RCT>();
SparseJoinState sparse(sparse_plan, a.index(), b.index());
- auto builder = factory.create_value_builder<OCT>(res_type,
- sparse_plan.sources.size(),
- dense_plan.output_size,
- sparse.first_index.size());
+ auto builder = factory.create_transient_value_builder<OCT>(res_type,
+ sparse_plan.sources.size(),
+ dense_plan.output_size,
+ sparse.first_index.size());
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
diff --git a/eval/src/vespa/eval/instruction/generic_create.cpp b/eval/src/vespa/eval/instruction/generic_create.cpp
index 02c89e0b43f..6e30da846e7 100644
--- a/eval/src/vespa/eval/instruction/generic_create.cpp
+++ b/eval/src/vespa/eval/instruction/generic_create.cpp
@@ -5,6 +5,7 @@
#include <vespa/eval/eval/array_array_map.h>
#include <vespa/vespalib/util/stash.h>
#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
using namespace vespalib::eval::tensor_function;
@@ -13,6 +14,7 @@ namespace vespalib::eval::instruction {
using State = InterpretedFunction::State;
using Instruction = InterpretedFunction::Instruction;
+using Handle = SharedStringRepo::Handle;
namespace {
@@ -21,12 +23,12 @@ struct CreateParam {
size_t num_mapped_dims;
size_t dense_subspace_size;
size_t num_children;
- ArrayArrayMap<vespalib::string,size_t> my_spec;
+ ArrayArrayMap<Handle,size_t> my_spec;
const ValueBuilderFactory &factory;
static constexpr size_t npos = -1;
- ArrayRef<size_t> indexes(ConstArrayRef<vespalib::string> key) {
+ ArrayRef<size_t> indexes(ConstArrayRef<Handle> key) {
auto [tag, first_time] = my_spec.lookup_or_add_entry(key);
auto rv = my_spec.get_values(tag);
if (first_time) {
@@ -49,7 +51,7 @@ struct CreateParam {
{
size_t last_child = num_children - 1;
for (const auto & entry : spec_in) {
- std::vector<vespalib::string> sparse_key;
+ std::vector<Handle> sparse_key;
size_t dense_key = 0;
auto dim = res_type.dimensions().begin();
auto binding = entry.first.begin();
@@ -58,7 +60,7 @@ struct CreateParam {
assert(dim->name == binding->first);
assert(dim->is_mapped() == binding->second.is_mapped());
if (dim->is_mapped()) {
- sparse_key.push_back(binding->second.name);
+ sparse_key.push_back(Handle(binding->second.name));
} else {
assert(binding->second.index < dim->size);
dense_key = (dense_key * dim->size) + binding->second.index;
@@ -76,16 +78,16 @@ struct CreateParam {
template <typename T>
void my_generic_create_op(State &state, uint64_t param_in) {
const auto &param = unwrap_param<CreateParam>(param_in);
- auto builder = param.factory.create_value_builder<T>(param.res_type,
- param.num_mapped_dims,
- param.dense_subspace_size,
- param.my_spec.size());
- std::vector<vespalib::stringref> sparse_addr;
+ auto builder = param.factory.create_transient_value_builder<T>(param.res_type,
+ param.num_mapped_dims,
+ param.dense_subspace_size,
+ param.my_spec.size());
+ std::vector<label_t> sparse_addr;
param.my_spec.each_entry([&](const auto &key, const auto &values)
{
sparse_addr.clear();
for (const auto & label : key) {
- sparse_addr.push_back(label);
+ sparse_addr.push_back(label.id());
}
T *dst = builder->add_subspace(sparse_addr).begin();
for (size_t stack_idx : values) {
diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp
index 026df5aa993..e0dc0feea28 100644
--- a/eval/src/vespa/eval/instruction/generic_join.cpp
+++ b/eval/src/vespa/eval/instruction/generic_join.cpp
@@ -41,7 +41,7 @@ generic_mixed_join(const Value &lhs, const Value &rhs, const JoinParam &param)
if (param.sparse_plan.lhs_overlap.empty() && param.sparse_plan.rhs_overlap.empty()) {
expected_subspaces = sparse.first_index.size() * sparse.second_index.size();
}
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces);
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces);
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
@@ -92,7 +92,7 @@ void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) {
SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index());
auto guess = lhs.index().size() * rhs.index().size();
assert(param.dense_plan.out_size == 1);
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess);
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess);
auto outer = sparse.first_index.create_view({});
assert(sparse.second_view_dims.empty());
auto inner = sparse.second_index.create_view({});
@@ -131,7 +131,7 @@ void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) {
}
Fun fun(param.function);
SparseJoinState sparse(param.sparse_plan, lhs_index, rhs_index);
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size());
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size());
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h
index 988286be980..217f3195dec 100644
--- a/eval/src/vespa/eval/instruction/generic_join.h
+++ b/eval/src/vespa/eval/instruction/generic_join.h
@@ -68,10 +68,10 @@ struct SparseJoinState {
const Value::Index &first_index;
const Value::Index &second_index;
const std::vector<size_t> &second_view_dims;
- std::vector<vespalib::stringref> full_address;
- std::vector<vespalib::stringref*> first_address;
- std::vector<const vespalib::stringref*> address_overlap;
- std::vector<vespalib::stringref*> second_only_address;
+ std::vector<label_t> full_address;
+ std::vector<label_t*> first_address;
+ std::vector<const label_t*> address_overlap;
+ std::vector<label_t*> second_only_address;
size_t lhs_subspace;
size_t rhs_subspace;
size_t &first_subspace;
diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp
index 02749a04eb9..107cb805d74 100644
--- a/eval/src/vespa/eval/instruction/generic_merge.cpp
+++ b/eval/src/vespa/eval/instruction/generic_merge.cpp
@@ -63,10 +63,10 @@ generic_mixed_merge(const Value &a, const Value &b,
const size_t num_mapped = params.num_mapped_dimensions;
const size_t subspace_size = params.dense_subspace_size;
size_t guess_subspaces = std::max(a.index().size(), b.index().size());
- auto builder = params.factory.create_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces);
- std::vector<vespalib::stringref> address(num_mapped);
- std::vector<const vespalib::stringref *> addr_cref;
- std::vector<vespalib::stringref *> addr_ref;
+ auto builder = params.factory.create_transient_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces);
+ std::vector<label_t> address(num_mapped);
+ std::vector<const label_t *> addr_cref;
+ std::vector<label_t *> addr_ref;
for (auto & ref : address) {
addr_cref.push_back(&ref);
addr_ref.push_back(&ref);
diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp
index 66538911890..d94742ae15c 100644
--- a/eval/src/vespa/eval/instruction/generic_peek.cpp
+++ b/eval/src/vespa/eval/instruction/generic_peek.cpp
@@ -7,6 +7,7 @@
#include <vespa/vespalib/util/stash.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
using namespace vespalib::eval::tensor_function;
@@ -16,6 +17,8 @@ namespace vespalib::eval::instruction {
using State = InterpretedFunction::State;
using Instruction = InterpretedFunction::Instruction;
+using Handle = SharedStringRepo::Handle;
+
namespace {
static constexpr size_t npos = -1;
@@ -35,28 +38,43 @@ size_t count_children(const Spec &spec)
}
struct DimSpec {
- vespalib::stringref name;
- GenericPeek::SpecMap::mapped_type child_or_label;
+ enum class DimType { CHILD_IDX, LABEL_IDX, LABEL_STR };
+ vespalib::string name;
+ DimType dim_type;
+ size_t idx;
+ Handle str;
+ static DimSpec from_child(const vespalib::string &name_in, size_t child_idx) {
+ return {name_in, DimType::CHILD_IDX, child_idx, Handle()};
+ }
+ static DimSpec from_label(const vespalib::string &name_in, const TensorSpec::Label &label) {
+ if (label.is_mapped()) {
+ return {name_in, DimType::LABEL_STR, 0, Handle(label.name)};
+ } else {
+ assert(label.is_indexed());
+ return {name_in, DimType::LABEL_IDX, label.index, Handle()};
+ }
+ }
+ ~DimSpec();
bool has_child() const {
- return std::holds_alternative<size_t>(child_or_label);
+ return (dim_type == DimType::CHILD_IDX);
}
bool has_label() const {
- return std::holds_alternative<TensorSpec::Label>(child_or_label);
+ return (dim_type != DimType::CHILD_IDX);
}
size_t get_child_idx() const {
- return std::get<size_t>(child_or_label);
+ assert(dim_type == DimType::CHILD_IDX);
+ return idx;
}
- vespalib::stringref get_label_name() const {
- auto & label = std::get<TensorSpec::Label>(child_or_label);
- assert(label.is_mapped());
- return label.name;
+ label_t get_label_name() const {
+ assert(dim_type == DimType::LABEL_STR);
+ return str.id();
}
size_t get_label_index() const {
- auto & label = std::get<TensorSpec::Label>(child_or_label);
- assert(label.is_indexed());
- return label.index;
+ assert(dim_type == DimType::LABEL_IDX);
+ return idx;
}
};
+DimSpec::~DimSpec() = default;
struct ExtractedSpecs {
using Dimension = ValueType::Dimension;
@@ -85,7 +103,11 @@ struct ExtractedSpecs {
dimensions.push_back(a);
const auto & [spec_dim_name, child_or_label] = b;
assert(a.name == spec_dim_name);
- specs.emplace_back(DimSpec{a.name, child_or_label});
+ if (std::holds_alternative<size_t>(child_or_label)) {
+ specs.push_back(DimSpec::from_child(a.name, std::get<size_t>(child_or_label)));
+ } else {
+ specs.push_back(DimSpec::from_label(a.name, std::get<TensorSpec::Label>(child_or_label)));
+ }
}
}
};
@@ -181,22 +203,21 @@ struct DensePlan {
};
struct SparseState {
- std::vector<vespalib::string> view_addr;
- std::vector<vespalib::stringref> view_refs;
- std::vector<const vespalib::stringref *> lookup_refs;
- std::vector<vespalib::stringref> output_addr;
- std::vector<vespalib::stringref *> fetch_addr;
-
- SparseState(std::vector<vespalib::string> view_addr_in, size_t out_dims)
- : view_addr(std::move(view_addr_in)),
- view_refs(view_addr.size()),
+ std::vector<Handle> handles;
+ std::vector<label_t> view_addr;
+ std::vector<const label_t *> lookup_refs;
+ std::vector<label_t> output_addr;
+ std::vector<label_t *> fetch_addr;
+
+ SparseState(std::vector<Handle> handles_in, std::vector<label_t> view_addr_in, size_t out_dims)
+ : handles(std::move(handles_in)),
+ view_addr(std::move(view_addr_in)),
lookup_refs(view_addr.size()),
output_addr(out_dims),
fetch_addr(out_dims)
{
for (size_t i = 0; i < view_addr.size(); ++i) {
- view_refs[i] = view_addr[i];
- lookup_refs[i] = &view_refs[i];
+ lookup_refs[i] = &view_addr[i];
}
for (size_t i = 0; i < out_dims; ++i) {
fetch_addr[i] = &output_addr[i];
@@ -236,17 +257,19 @@ struct SparsePlan {
template <typename Getter>
SparseState make_state(const Getter &get_child_value) const {
- std::vector<vespalib::string> view_addr;
+ std::vector<Handle> handles;
+ std::vector<label_t> view_addr;
for (const auto & dim : lookup_specs) {
if (dim.has_child()) {
int64_t child_value = get_child_value(dim.get_child_idx());
- view_addr.push_back(vespalib::make_string("%" PRId64, child_value));
+ handles.emplace_back(vespalib::make_string("%" PRId64, child_value));
+ view_addr.push_back(handles.back().id());
} else {
view_addr.push_back(dim.get_label_name());
}
}
assert(view_addr.size() == view_dims.size());
- return SparseState(std::move(view_addr), out_mapped_dims);
+ return SparseState(std::move(handles), std::move(view_addr), out_mapped_dims);
}
};
SparsePlan::~SparsePlan() = default;
@@ -284,10 +307,10 @@ generic_mixed_peek(const ValueType &res_type,
{
auto input_cells = input_value.cells().typify<ICT>();
size_t bad_guess = 1;
- auto builder = factory.create_value_builder<OCT>(res_type,
- sparse_plan.out_mapped_dims,
- dense_plan.out_dense_size,
- bad_guess);
+ auto builder = factory.create_transient_value_builder<OCT>(res_type,
+ sparse_plan.out_mapped_dims,
+ dense_plan.out_dense_size,
+ bad_guess);
size_t filled_subspaces = 0;
size_t dense_offset = dense_plan.get_offset(get_child_value);
if (dense_offset != npos) {
@@ -304,7 +327,7 @@ generic_mixed_peek(const ValueType &res_type,
}
}
if ((sparse_plan.out_mapped_dims == 0) && (filled_subspaces == 0)) {
- for (auto & v : builder->add_subspace({})) {
+ for (auto & v : builder->add_subspace()) {
v = OCT{};
}
}
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp
index afc46e8ee7d..d30186d3dd8 100644
--- a/eval/src/vespa/eval/instruction/generic_reduce.cpp
+++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp
@@ -45,10 +45,10 @@ ReduceParam::~ReduceParam() = default;
//-----------------------------------------------------------------------------
struct SparseReduceState {
- std::vector<vespalib::stringref> full_address;
- std::vector<vespalib::stringref*> fetch_address;
- std::vector<vespalib::stringref*> keep_address;
- size_t subspace;
+ std::vector<label_t> full_address;
+ std::vector<label_t*> fetch_address;
+ std::vector<label_t*> keep_address;
+ size_t subspace;
SparseReduceState(const SparseReducePlan &plan)
: full_address(plan.keep_dims.size() + plan.num_reduce_dims),
@@ -71,20 +71,20 @@ template <typename ICT, typename OCT, typename AGGR>
Value::UP
generic_reduce(const Value &value, const ReduceParam &param) {
auto cells = value.cells().typify<ICT>();
- ArrayArrayMap<vespalib::stringref,AGGR> map(param.sparse_plan.keep_dims.size(),
- param.dense_plan.out_size,
- value.index().size());
+ ArrayArrayMap<label_t,AGGR> map(param.sparse_plan.keep_dims.size(),
+ param.dense_plan.out_size,
+ value.index().size());
SparseReduceState sparse(param.sparse_plan);
auto full_view = value.index().create_view({});
full_view->lookup({});
- ConstArrayRef<vespalib::stringref*> keep_addr(sparse.keep_address);
+ ConstArrayRef<label_t*> keep_addr(sparse.keep_address);
while (full_view->next_result(sparse.fetch_address, sparse.subspace)) {
auto [tag, ignore] = map.lookup_or_add_entry(keep_addr);
AGGR *dst = map.get_values(tag).begin();
auto sample = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx].sample(cells[src_idx]); };
param.dense_plan.execute(sparse.subspace * param.dense_plan.in_size, sample);
}
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size());
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size());
map.each_entry([&](const auto &keys, const auto &values)
{
OCT *dst = builder->add_subspace(keys).begin();
@@ -93,7 +93,7 @@ generic_reduce(const Value &value, const ReduceParam &param) {
}
});
if ((map.size() == 0) && param.sparse_plan.keep_dims.empty()) {
- auto zero = builder->add_subspace({});
+ auto zero = builder->add_subspace();
for (size_t i = 0; i < zero.size(); ++i) {
zero[i] = OCT{};
}
diff --git a/eval/src/vespa/eval/instruction/generic_rename.cpp b/eval/src/vespa/eval/instruction/generic_rename.cpp
index 1ce18597ec2..894ef37b678 100644
--- a/eval/src/vespa/eval/instruction/generic_rename.cpp
+++ b/eval/src/vespa/eval/instruction/generic_rename.cpp
@@ -69,15 +69,15 @@ generic_rename(const Value &a,
const ValueType &res_type, const ValueBuilderFactory &factory)
{
auto cells = a.cells().typify<CT>();
- std::vector<vespalib::stringref> output_address(sparse_plan.mapped_dims);
- std::vector<vespalib::stringref*> input_address;
+ std::vector<label_t> output_address(sparse_plan.mapped_dims);
+ std::vector<label_t*> input_address;
for (size_t maps_to : sparse_plan.output_dimensions) {
input_address.push_back(&output_address[maps_to]);
}
- auto builder = factory.create_value_builder<CT>(res_type,
- sparse_plan.mapped_dims,
- dense_plan.subspace_size,
- a.index().size());
+ auto builder = factory.create_transient_value_builder<CT>(res_type,
+ sparse_plan.mapped_dims,
+ dense_plan.subspace_size,
+ a.index().size());
auto view = a.index().create_view({});
view->lookup({});
size_t subspace;
diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp
index bdfe5fd4e27..06162b2200d 100644
--- a/eval/src/vespa/eval/streamed/streamed_value.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value.cpp
@@ -16,8 +16,7 @@ StreamedValue<T>::get_memory_usage() const
{
MemoryUsage usage = self_memory_usage<StreamedValue<T>>();
usage.merge(vector_extra_memory_usage(_my_cells));
- usage.incUsedBytes(_label_buf.byteSize());
- usage.incAllocatedBytes(_label_buf.byteCapacity());
+ usage.merge(vector_extra_memory_usage(_my_labels.view().handles()));
return usage;
}
diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h
index 258802a53e8..94603d9d35e 100644
--- a/eval/src/vespa/eval/streamed/streamed_value.h
+++ b/eval/src/vespa/eval/streamed/streamed_value.h
@@ -4,6 +4,7 @@
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include "streamed_value_index.h"
#include <cassert>
@@ -19,20 +20,22 @@ template <typename T>
class StreamedValue : public Value
{
private:
+ using StrongHandles = SharedStringRepo::StrongHandles;
+
ValueType _type;
std::vector<T> _my_cells;
- Array<char> _label_buf;
+ StrongHandles _my_labels;
StreamedValueIndex _my_index;
public:
StreamedValue(ValueType type, size_t num_mapped_dimensions,
- std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf)
+ std::vector<T> cells, size_t num_subspaces, StrongHandles && handles)
: _type(std::move(type)),
_my_cells(std::move(cells)),
- _label_buf(std::move(label_buf)),
+ _my_labels(std::move(handles)),
_my_index(num_mapped_dimensions,
num_subspaces,
- ConstArrayRef<char>(_label_buf.begin(), _label_buf.size()))
+ _my_labels.view().handles())
{
assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size());
}
@@ -42,7 +45,6 @@ public:
TypedCells cells() const final override { return TypedCells(_my_cells); }
const Value::Index &index() const final override { return _my_index; }
MemoryUsage get_memory_usage() const final override;
- auto get_data_reference() const { return _my_index.get_data_reference(); }
};
} // namespace
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h
index 5698c805756..48a01f893de 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h
@@ -3,7 +3,7 @@
#pragma once
#include "streamed_value.h"
-#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace vespalib::eval {
@@ -14,12 +14,14 @@ template <typename T>
class StreamedValueBuilder : public ValueBuilder<T>
{
private:
+ using StrongHandles = SharedStringRepo::StrongHandles;
+
ValueType _type;
size_t _num_mapped_dimensions;
size_t _dense_subspace_size;
std::vector<T> _cells;
size_t _num_subspaces;
- nbostream _labels;
+ StrongHandles _labels;
public:
StreamedValueBuilder(const ValueType &type,
size_t num_mapped_in,
@@ -30,18 +32,26 @@ public:
_dense_subspace_size(subspace_size_in),
_cells(),
_num_subspaces(0),
- _labels()
+ _labels(num_mapped_in * expected_subspaces)
{
_cells.reserve(subspace_size_in * expected_subspaces);
- // assume small sized label strings:
- _labels.reserve(num_mapped_in * expected_subspaces * 3);
};
~StreamedValueBuilder();
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
for (auto label : addr) {
- _labels.writeSmallString(label);
+ _labels.add(label);
+ }
+ size_t old_sz = _cells.size();
+ _cells.resize(old_sz + _dense_subspace_size);
+ _num_subspaces++;
+ return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size);
+ }
+
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override {
+ for (auto label : addr) {
+ _labels.add(label);
}
size_t old_sz = _cells.size();
_cells.resize(old_sz + _dense_subspace_size);
@@ -58,7 +68,7 @@ public:
_num_mapped_dimensions,
std::move(_cells),
_num_subspaces,
- _labels.extract_buffer());
+ std::move(_labels));
}
};
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
index aa6347a2c51..5111ba8a71e 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
@@ -19,10 +19,12 @@ struct SelectStreamedValueBuilder {
std::unique_ptr<ValueBuilderBase>
StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type,
+ bool transient,
size_t num_mapped,
size_t subspace_size,
size_t expected_subspaces) const
{
+ (void) transient;
return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>(
type.cell_type(),
type, num_mapped, subspace_size, expected_subspaces);
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
index 3f81981f429..58072aa31dc 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
@@ -14,7 +14,7 @@ private:
StreamedValueBuilderFactory() {}
static StreamedValueBuilderFactory _factory;
std::unique_ptr<ValueBuilderBase> create_value_builder_base(
- const ValueType &type, size_t num_mapped_in,
+ const ValueType &type, bool transient, size_t num_mapped_in,
size_t subspace_size_in, size_t expected_subspaces) const override;
public:
static const StreamedValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
index 17cf7316554..a014f2dcee9 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_index.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
@@ -18,7 +18,7 @@ struct StreamedFilterView : Value::Index::View
{
LabelBlockStream label_blocks;
std::vector<size_t> view_dims;
- std::vector<vespalib::stringref> to_match;
+ std::vector<label_t> to_match;
StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in)
: label_blocks(std::move(labels)),
@@ -28,7 +28,7 @@ struct StreamedFilterView : Value::Index::View
to_match.reserve(view_dims.size());
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
label_blocks.reset();
to_match.clear();
for (auto ptr : addr) {
@@ -37,7 +37,7 @@ struct StreamedFilterView : Value::Index::View
assert(view_dims.size() == to_match.size());
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
while (const auto block = label_blocks.next_block()) {
idx_out = block.subspace_index;
bool matches = true;
@@ -66,12 +66,12 @@ struct StreamedIterationView : Value::Index::View
: label_blocks(std::move(labels))
{}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
label_blocks.reset();
assert(addr.size() == 0);
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
if (auto block = label_blocks.next_block()) {
idx_out = block.subspace_index;
size_t i = 0;
@@ -90,7 +90,7 @@ struct StreamedIterationView : Value::Index::View
std::unique_ptr<Value::Index::View>
StreamedValueIndex::create_view(const std::vector<size_t> &dims) const
{
- LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims);
+ LabelBlockStream label_stream(_num_subspaces, _labels_ref, _num_mapped_dims);
if (dims.empty()) {
return std::make_unique<StreamedIterationView>(std::move(label_stream));
}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h
index 8fd561200c3..aa1c9a0e201 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_index.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace vespalib::eval {
@@ -12,25 +13,21 @@ namespace vespalib::eval {
**/
class StreamedValueIndex : public Value::Index
{
+private:
+ uint32_t _num_mapped_dims;
+ uint32_t _num_subspaces;
+ const std::vector<label_t> &_labels_ref;
+
public:
- struct SerializedDataRef {
- uint32_t num_mapped_dims;
- uint32_t num_subspaces;
- ConstArrayRef<char> labels_buffer;
- };
- StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf)
- : _data{num_mapped_dims, num_subspaces, labels_buf}
+ StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<label_t> &labels_ref)
+ : _num_mapped_dims(num_mapped_dims),
+ _num_subspaces(num_subspaces),
+ _labels_ref(labels_ref)
{}
// index API:
- size_t size() const override { return _data.num_subspaces; }
+ size_t size() const override { return _num_subspaces; }
std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
-
- SerializedDataRef get_data_reference() const { return _data; }
-
-private:
- SerializedDataRef _data;
};
} // namespace
-
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h
index b88d4df8581..6b44e052f0c 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_utils.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h
@@ -4,24 +4,23 @@
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/objects/nbostream.h>
+#include <cassert>
namespace vespalib::eval {
/**
* Reads a stream of serialized labels.
- * Reading more labels than available will
- * throw an exception.
+ * Reading more labels than available will trigger an assert.
**/
struct LabelStream {
- nbostream source;
- LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {}
- vespalib::stringref next_label() {
- size_t str_size = source.getInt1_4Bytes();
- vespalib::stringref label(source.peek(), str_size);
- source.adjustReadPos(str_size);
- return label;
+ const std::vector<label_t> &source;
+ size_t pos;
+ LabelStream(const std::vector<label_t> &data) : source(data), pos(0) {}
+ label_t next_label() {
+ assert(pos < source.size());
+ return source[pos++];
}
- void reset() { source.rp(0); }
+ void reset() { pos = 0; }
};
/**
@@ -30,7 +29,7 @@ struct LabelStream {
struct LabelBlock {
static constexpr size_t npos = -1;
size_t subspace_index;
- ConstArrayRef<vespalib::stringref> address;
+ ConstArrayRef<label_t> address;
operator bool() const { return subspace_index != npos; }
};
@@ -43,7 +42,7 @@ private:
size_t _num_subspaces;
LabelStream _labels;
size_t _subspace_index;
- std::vector<vespalib::stringref> _current_address;
+ std::vector<label_t> _current_address;
public:
LabelBlock next_block() {
if (_subspace_index < _num_subspaces) {
@@ -62,10 +61,10 @@ public:
}
LabelBlockStream(uint32_t num_subspaces,
- ConstArrayRef<char> label_buf,
+ const std::vector<label_t> &labels,
uint32_t num_mapped_dims)
: _num_subspaces(num_subspaces),
- _labels(label_buf),
+ _labels(labels),
_subspace_index(num_subspaces),
_current_address(num_mapped_dims)
{}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h
index e37f442dd9a..38eb8db786f 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_view.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.h
@@ -24,10 +24,10 @@ private:
public:
StreamedValueView(const ValueType &type, size_t num_mapped_dimensions,
TypedCells cells, size_t num_subspaces,
- ConstArrayRef<char> labels_buf)
+ const std::vector<label_t> &labels)
: _type(type),
_cells_ref(cells),
- _my_index(num_mapped_dimensions, num_subspaces, labels_buf)
+ _my_index(num_mapped_dimensions, num_subspaces, labels)
{
assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size);
}
@@ -39,7 +39,6 @@ public:
MemoryUsage get_memory_usage() const final override {
return self_memory_usage<StreamedValueView>();
}
- auto get_data_reference() const { return _my_index.get_data_reference(); }
};
} // namespace
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index e1bd47af358..7b597af417d 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -563,7 +563,7 @@ void
Fixture::testCompaction()
{
if ((_traits.use_dense_tensor_attribute && _denseTensors) ||
- _traits.use_direct_tensor_attribute)
+ ! _traits.use_dense_tensor_attribute)
{
LOG(info, "Skipping compaction test for tensor '%s' which is using free-lists", _cfg.tensorType().to_spec().c_str());
return;
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
index 6e1fb1a0a2f..260ffa1a388 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.cpp
@@ -3,8 +3,7 @@
#include "serialized_fast_value_attribute.h"
#include "streamed_value_saver.h"
#include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/fast_value.hpp>
-#include <vespa/eval/streamed/streamed_value_utils.h>
+#include <vespa/eval/eval/fast_value.h>
#include <vespa/fastlib/io/bufferedfile.h>
#include <vespa/searchlib/attribute/readerbase.h>
#include <vespa/searchlib/util/fileutil.h>
@@ -21,127 +20,10 @@ using namespace vespalib::eval;
namespace search::tensor {
-namespace {
-
-struct ValueBlock : LabelBlock {
- TypedCells cells;
-};
-
-class ValueBlockStream {
-private:
- const StreamedValueStore::DataFromType &_from_type;
- LabelBlockStream _label_block_stream;
- const char *_cells_ptr;
-
- size_t dsss() const { return _from_type.dense_subspace_size; }
- auto cell_type() const { return _from_type.cell_type; }
-public:
- ValueBlock next_block() {
- auto labels = _label_block_stream.next_block();
- if (labels) {
- TypedCells subspace_cells(_cells_ptr, cell_type(), dsss());
- _cells_ptr += CellTypeUtils::mem_size(cell_type(), dsss());
- return ValueBlock{labels, subspace_cells};
- } else {
- TypedCells none(nullptr, cell_type(), 0);
- return ValueBlock{labels, none};
- }
- }
-
- ValueBlockStream(const StreamedValueStore::DataFromType &from_type,
- const StreamedValueStore::StreamedValueData &from_store)
- : _from_type(from_type),
- _label_block_stream(from_store.num_subspaces,
- from_store.labels_buffer,
- from_type.num_mapped_dimensions),
- _cells_ptr((const char *)from_store.cells_ref.data)
- {
- _label_block_stream.reset();
- }
-
- ~ValueBlockStream();
-};
-
-ValueBlockStream::~ValueBlockStream() = default;
-
-void report_problematic_subspace(size_t idx,
- const StreamedValueStore::DataFromType &from_type,
- const StreamedValueStore::StreamedValueData &from_store)
-{
- LOG(error, "PROBLEM: add_mapping returned same index=%zu twice", idx);
- FastValueIndex temp_index(from_type.num_mapped_dimensions,
- from_store.num_subspaces);
- auto from_start = ValueBlockStream(from_type, from_store);
- while (auto redo_block = from_start.next_block()) {
- if (idx == temp_index.map.add_mapping(redo_block.address)) {
- vespalib::string msg = "Block with address[ ";
- for (vespalib::stringref ref : redo_block.address) {
- msg.append("'").append(ref).append("' ");
- }
- msg.append("]");
- LOG(error, "%s maps to subspace %zu", msg.c_str(), idx);
- }
- }
-}
-
-/**
- * This Value implementation is almost exactly like FastValue, but
- * instead of owning its type and cells it just has a reference to
- * data stored elsewhere.
- * XXX: we should find a better name for this, and move it
- * (together with the helper classes above) to its own file,
- * and add associated unit tests.
- **/
-class OnlyFastValueIndex : public Value {
-private:
- const ValueType &_type;
- TypedCells _cells;
- FastValueIndex my_index;
-public:
- OnlyFastValueIndex(const ValueType &type,
- const StreamedValueStore::DataFromType &from_type,
- const StreamedValueStore::StreamedValueData &from_store)
- : _type(type),
- _cells(from_store.cells_ref),
- my_index(from_type.num_mapped_dimensions,
- from_store.num_subspaces)
- {
- assert(_type.cell_type() == _cells.type);
- std::vector<vespalib::stringref> address(from_type.num_mapped_dimensions);
- auto block_stream = ValueBlockStream(from_type, from_store);
- size_t ss = 0;
- while (auto block = block_stream.next_block()) {
- size_t idx = my_index.map.add_mapping(block.address);
- if (idx != ss) {
- report_problematic_subspace(idx, from_type, from_store);
- }
- ++ss;
- }
- assert(ss == from_store.num_subspaces);
- }
-
-
- ~OnlyFastValueIndex();
-
- const ValueType &type() const final override { return _type; }
- TypedCells cells() const final override { return _cells; }
- const Index &index() const final override { return my_index; }
- vespalib::MemoryUsage get_memory_usage() const final override {
- auto usage = self_memory_usage<OnlyFastValueIndex>();
- usage.merge(my_index.map.estimate_extra_memory_usage());
- return usage;
- }
-};
-
-OnlyFastValueIndex::~OnlyFastValueIndex() = default;
-
-}
-
SerializedFastValueAttribute::SerializedFastValueAttribute(stringref name, const Config &cfg)
: TensorAttribute(name, cfg, _streamedValueStore),
_tensor_type(cfg.tensorType()),
- _streamedValueStore(_tensor_type),
- _data_from_type(_tensor_type)
+ _streamedValueStore(_tensor_type)
{
}
@@ -171,10 +53,8 @@ SerializedFastValueAttribute::getTensor(DocId docId) const
if (!ref.valid()) {
return {};
}
- if (auto data_from_store = _streamedValueStore.get_tensor_data(ref)) {
- return std::make_unique<OnlyFastValueIndex>(_tensor_type,
- _data_from_type,
- data_from_store);
+ if (const auto * ptr = _streamedValueStore.get_tensor_entry(ref)) {
+ return ptr->create_fast_value_view(_tensor_type);
}
return {};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
index a8c1df4913a..cc559d9b758 100644
--- a/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
+++ b/searchlib/src/vespa/searchlib/tensor/serialized_fast_value_attribute.h
@@ -19,7 +19,6 @@ namespace search::tensor {
class SerializedFastValueAttribute : public TensorAttribute {
vespalib::eval::ValueType _tensor_type;
StreamedValueStore _streamedValueStore; // data store for serialized tensors
- const StreamedValueStore::DataFromType _data_from_type;
public:
SerializedFastValueAttribute(vespalib::stringref baseFileName, const Config &cfg);
virtual ~SerializedFastValueAttribute();
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
index c4579880409..ef4b711b86f 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.cpp
@@ -1,99 +1,204 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "streamed_value_store.h"
-#include "tensor_deserialize.h"
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/fast_value.hpp>
#include <vespa/eval/streamed/streamed_value_builder_factory.h>
#include <vespa/eval/streamed/streamed_value_view.h>
#include <vespa/vespalib/datastore/datastore.hpp>
#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.tensor.streamed_value_store");
using vespalib::datastore::Handle;
+using vespalib::datastore::EntryRef;
using namespace vespalib::eval;
+using vespalib::ConstArrayRef;
+using vespalib::MemoryUsage;
namespace search::tensor {
+//-----------------------------------------------------------------------------
+
namespace {
-constexpr size_t MIN_BUFFER_ARRAYS = 1024;
-
-struct CellsMemBlock {
- uint32_t num;
- uint32_t total_sz;
- const char *ptr;
- CellsMemBlock(TypedCells cells)
- : num(cells.size),
- total_sz(CellTypeUtils::mem_size(cells.type, num)),
- ptr((const char *)cells.data)
- {}
+template <typename CT, typename F>
+void each_subspace(const Value &value, size_t num_mapped, size_t dense_size, F f) {
+ size_t subspace;
+ std::vector<label_t> addr(num_mapped);
+ std::vector<label_t*> refs;
+ refs.reserve(addr.size());
+ for (label_t &label: addr) {
+ refs.push_back(&label);
+ }
+ auto cells = value.cells().typify<CT>();
+ auto view = value.index().create_view({});
+ view->lookup({});
+ while (view->next_result(refs, subspace)) {
+ size_t offset = subspace * dense_size;
+ f(ConstArrayRef<label_t>(addr), ConstArrayRef<CT>(cells.begin() + offset, dense_size));
+ }
+}
+
+using TensorEntry = StreamedValueStore::TensorEntry;
+
+struct CreateTensorEntry {
+ template <typename CT>
+ static TensorEntry::SP invoke(const Value &value, size_t num_mapped, size_t dense_size) {
+ using EntryImpl = StreamedValueStore::TensorEntryImpl<CT>;
+ return std::make_shared<EntryImpl>(value, num_mapped, dense_size);
+ }
};
-template<typename T>
-void check_alignment(T *ptr, size_t align)
+using HandleView = vespalib::SharedStringRepo::HandleView;
+
+struct MyFastValueView final : Value {
+ const ValueType &my_type;
+ FastValueIndex my_index;
+ TypedCells my_cells;
+ MyFastValueView(const ValueType &type_ref, HandleView handle_view, TypedCells cells, size_t num_mapped, size_t num_spaces)
+ : my_type(type_ref),
+ my_index(num_mapped, handle_view, num_spaces),
+ my_cells(cells)
+ {
+ const std::vector<label_t> &labels = handle_view.handles();
+ for (size_t i = 0; i < num_spaces; ++i) {
+ ConstArrayRef<label_t> addr(&labels[i * num_mapped], num_mapped);
+ my_index.map.add_mapping(FastAddrMap::hash_labels(addr));
+ }
+ assert(my_index.map.size() == num_spaces);
+ }
+ const ValueType &type() const override { return my_type; }
+ const Value::Index &index() const override { return my_index; }
+ TypedCells cells() const override { return my_cells; }
+ MemoryUsage get_memory_usage() const override {
+ MemoryUsage usage = self_memory_usage<MyFastValueView>();
+ usage.merge(my_index.map.estimate_extra_memory_usage());
+ return usage;
+ }
+};
+
+} // <unnamed>
+
+//-----------------------------------------------------------------------------
+
+StreamedValueStore::TensorEntry::~TensorEntry() = default;
+
+StreamedValueStore::TensorEntry::SP
+StreamedValueStore::TensorEntry::create_shared_entry(const Value &value)
{
- static_assert(sizeof(T) == 1);
- size_t ptr_val = (size_t)ptr;
- size_t unalign = ptr_val & (align - 1);
- assert(unalign == 0);
+ size_t num_mapped = value.type().count_mapped_dimensions();
+ size_t dense_size = value.type().dense_subspace_size();
+ return vespalib::typify_invoke<1,TypifyCellType,CreateTensorEntry>(value.type().cell_type(), value, num_mapped, dense_size);
}
-} // namespace <unnamed>
+template <typename CT>
+StreamedValueStore::TensorEntryImpl<CT>::TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size)
+ : handles(num_mapped * value.index().size()),
+ cells()
+{
+ cells.reserve(dense_size * value.index().size());
+ auto store_subspace = [&](auto addr, auto data) {
+ for (label_t label: addr) {
+ handles.add(label);
+ }
+ for (CT entry: data) {
+ cells.push_back(entry);
+ }
+ };
+ each_subspace<CT>(value, num_mapped, dense_size, store_subspace);
+}
-StreamedValueStore::StreamedValueStore(const ValueType &tensor_type)
- : TensorStore(_concreteStore),
- _concreteStore(),
- _bufferType(RefType::align(1),
- MIN_BUFFER_ARRAYS,
- RefType::offsetSize() / RefType::align(1)),
- _tensor_type(tensor_type),
- _data_from_type(_tensor_type)
+template <typename CT>
+Value::UP
+StreamedValueStore::TensorEntryImpl<CT>::create_fast_value_view(const ValueType &type_ref) const
{
- _store.addType(&_bufferType);
- _store.initActiveBuffers();
- size_t align = CellTypeUtils::alignment(_data_from_type.cell_type);
- // max alignment we can handle is 8:
- assert(align <= 8);
- // alignment must be a power of two:
- assert((align & (align-1)) == 0);
+ size_t num_mapped = type_ref.count_mapped_dimensions();
+ size_t dense_size = type_ref.dense_subspace_size();
+ size_t num_spaces = cells.size() / dense_size;
+ assert(dense_size * num_spaces == cells.size());
+ assert(num_mapped * num_spaces == handles.view().handles().size());
+ return std::make_unique<MyFastValueView>(type_ref, handles.view(), TypedCells(cells), num_mapped, num_spaces);
}
-StreamedValueStore::~StreamedValueStore()
+template <typename CT>
+void
+StreamedValueStore::TensorEntryImpl<CT>::encode_value(const ValueType &type, vespalib::nbostream &target) const
{
- _store.dropBuffers();
+ size_t num_mapped = type.count_mapped_dimensions();
+ size_t dense_size = type.dense_subspace_size();
+ size_t num_spaces = cells.size() / dense_size;
+ assert(dense_size * num_spaces == cells.size());
+ assert(num_mapped * num_spaces == handles.view().handles().size());
+ StreamedValueView my_value(type, num_mapped, TypedCells(cells), num_spaces, handles.view().handles());
+ ::vespalib::eval::encode_value(my_value, target);
}
-std::pair<const char *, uint32_t>
-StreamedValueStore::getRawBuffer(RefType ref) const
+template <typename CT>
+MemoryUsage
+StreamedValueStore::TensorEntryImpl<CT>::get_memory_usage() const
+{
+ MemoryUsage usage = self_memory_usage<TensorEntryImpl<CT>>();
+ usage.merge(vector_extra_memory_usage(handles.view().handles()));
+ usage.merge(vector_extra_memory_usage(cells));
+ return usage;
+}
+
+template <typename CT>
+StreamedValueStore::TensorEntryImpl<CT>::~TensorEntryImpl() = default;
+
+//-----------------------------------------------------------------------------
+
+constexpr size_t MIN_BUFFER_ARRAYS = 8192;
+
+StreamedValueStore::TensorBufferType::TensorBufferType()
+ : ParentType(1, MIN_BUFFER_ARRAYS, TensorStoreType::RefType::offsetSize())
{
- if (!ref.valid()) {
- return std::make_pair(nullptr, 0u);
- }
- const char *buf = _store.getEntry<char>(ref);
- uint32_t len = *reinterpret_cast<const uint32_t *>(buf);
- return std::make_pair(buf + sizeof(uint32_t), len);
}
-Handle<char>
-StreamedValueStore::allocRawBuffer(uint32_t size)
+void
+StreamedValueStore::TensorBufferType::cleanHold(void* buffer, size_t offset, size_t num_elems, CleanContext clean_ctx)
{
- if (size == 0) {
- return Handle<char>();
+ TensorEntry::SP* elem = static_cast<TensorEntry::SP*>(buffer) + offset;
+ for (size_t i = 0; i < num_elems; ++i) {
+ clean_ctx.extraBytesCleaned((*elem)->get_memory_usage().allocatedBytes());
+ *elem = _emptyEntry;
+ ++elem;
}
- size_t extSize = size + sizeof(uint32_t);
- size_t bufSize = RefType::align(extSize);
- auto result = _concreteStore.rawAllocator<char>(_typeId).alloc(bufSize);
- *reinterpret_cast<uint32_t *>(result.data) = size;
- char *padWritePtr = result.data + extSize;
- for (size_t i = extSize; i < bufSize; ++i) {
- *padWritePtr++ = 0;
+}
+
+StreamedValueStore::StreamedValueStore(const ValueType &tensor_type)
+ : TensorStore(_concrete_store),
+ _concrete_store(),
+ _tensor_type(tensor_type)
+{
+ _concrete_store.enableFreeLists();
+}
+
+StreamedValueStore::~StreamedValueStore() = default;
+
+EntryRef
+StreamedValueStore::add_entry(TensorEntry::SP tensor)
+{
+ auto ref = _concrete_store.addEntry(tensor);
+ auto& state = _concrete_store.getBufferState(RefType(ref).bufferId());
+ state.incExtraUsedBytes(tensor->get_memory_usage().allocatedBytes());
+ return ref;
+}
+
+const StreamedValueStore::TensorEntry *
+StreamedValueStore::get_tensor_entry(EntryRef ref) const
+{
+ if (!ref.valid()) {
+ return nullptr;
}
- // Hide length of buffer (first 4 bytes) from users of the buffer.
- return Handle<char>(result.ref, result.data + sizeof(uint32_t));
+ const auto& entry = _concrete_store.getEntry(ref);
+ assert(entry);
+ return entry.get();
}
void
@@ -102,111 +207,40 @@ StreamedValueStore::holdTensor(EntryRef ref)
if (!ref.valid()) {
return;
}
- RefType iRef(ref);
- const char *buf = _store.getEntry<char>(iRef);
- uint32_t len = *reinterpret_cast<const uint32_t *>(buf);
- _concreteStore.holdElem(ref, len + sizeof(uint32_t));
+ const auto& tensor = _concrete_store.getEntry(ref);
+ assert(tensor);
+ _concrete_store.holdElem(ref, 1, tensor->get_memory_usage().allocatedBytes());
}
TensorStore::EntryRef
StreamedValueStore::move(EntryRef ref)
{
if (!ref.valid()) {
- return RefType();
+ return EntryRef();
}
- auto oldraw = getRawBuffer(ref);
- auto newraw = allocRawBuffer(oldraw.second);
- memcpy(newraw.data, oldraw.first, oldraw.second);
- _concreteStore.holdElem(ref, oldraw.second + sizeof(uint32_t));
- return newraw.ref;
-}
-
-StreamedValueStore::StreamedValueData
-StreamedValueStore::get_tensor_data(EntryRef ref) const
-{
- StreamedValueData retval;
- retval.valid = false;
- auto raw = getRawBuffer(ref);
- if (raw.second == 0u) {
- return retval;
- }
- vespalib::nbostream source(raw.first, raw.second);
- uint32_t num_cells = source.readValue<uint32_t>();
- check_alignment(source.peek(), CellTypeUtils::alignment(_data_from_type.cell_type));
- retval.cells_ref = TypedCells(source.peek(), _data_from_type.cell_type, num_cells);
- source.adjustReadPos(CellTypeUtils::mem_size(_data_from_type.cell_type, num_cells));
- assert((num_cells % _data_from_type.dense_subspace_size) == 0);
- retval.num_subspaces = num_cells / _data_from_type.dense_subspace_size;
- retval.labels_buffer = vespalib::ConstArrayRef<char>(source.peek(), source.size());
- retval.valid = true;
- return retval;
+ const auto& old_tensor = _concrete_store.getEntry(ref);
+ assert(old_tensor);
+ auto new_ref = add_entry(old_tensor);
+ _concrete_store.holdElem(ref, 1, old_tensor->get_memory_usage().allocatedBytes());
+ return new_ref;
}
bool
StreamedValueStore::encode_tensor(EntryRef ref, vespalib::nbostream &target) const
{
- if (auto data = get_tensor_data(ref)) {
- StreamedValueView value(
- _tensor_type, _data_from_type.num_mapped_dimensions,
- data.cells_ref, data.num_subspaces, data.labels_buffer);
- vespalib::eval::encode_value(value, target);
+ if (const auto * entry = get_tensor_entry(ref)) {
+ entry->encode_value(_tensor_type, target);
return true;
} else {
return false;
}
}
-void
-StreamedValueStore::serialize_labels(const Value::Index &index,
- vespalib::nbostream &target) const
-{
- uint32_t num_subspaces = index.size();
- uint32_t num_mapped_dims = _data_from_type.num_mapped_dimensions;
- std::vector<vespalib::stringref> labels(num_mapped_dims * num_subspaces);
- auto view = index.create_view({});
- view->lookup({});
- std::vector<vespalib::stringref> addr(num_mapped_dims);
- std::vector<vespalib::stringref *> addr_refs;
- for (auto & label : addr) {
- addr_refs.push_back(&label);
- }
- size_t subspace;
- for (size_t ss = 0; ss < num_subspaces; ++ss) {
- bool ok = view->next_result(addr_refs, subspace);
- assert(ok);
- size_t idx = subspace * num_mapped_dims;
- for (auto label : addr) {
- labels[idx++] = label;
- }
- }
- bool ok = view->next_result(addr_refs, subspace);
- assert(!ok);
- for (auto label : labels) {
- target.writeSmallString(label);
- }
-}
-
TensorStore::EntryRef
StreamedValueStore::store_tensor(const Value &tensor)
{
assert(tensor.type() == _tensor_type);
- CellsMemBlock cells_mem(tensor.cells());
- vespalib::nbostream stream;
- stream << uint32_t(cells_mem.num);
- serialize_labels(tensor.index(), stream);
- size_t mem_size = stream.size() + cells_mem.total_sz;
- auto raw = allocRawBuffer(mem_size);
- char *target = raw.data;
- memcpy(target, stream.peek(), sizeof(uint32_t));
- stream.adjustReadPos(sizeof(uint32_t));
- target += sizeof(uint32_t);
- check_alignment(target, CellTypeUtils::alignment(_data_from_type.cell_type));
- memcpy(target, cells_mem.ptr, cells_mem.total_sz);
- target += cells_mem.total_sz;
- memcpy(target, stream.peek(), stream.size());
- target += stream.size();
- assert(target <= raw.data + mem_size);
- return raw.ref;
+ return add_entry(TensorEntry::create_shared_entry(tensor));
}
TensorStore::EntryRef
diff --git a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
index de94dc043d3..3a9d9a0b7b4 100644
--- a/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/streamed_value_store.h
@@ -5,87 +5,71 @@
#include "tensor_store.h"
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/value.h>
+#include <vespa/eval/streamed/streamed_value.h>
#include <vespa/vespalib/objects/nbostream.h>
-#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace search::tensor {
/**
- * Class for storing tensors in memory, with a special serialization
- * format that can be used directly to make a StreamedValueView.
- *
- * The tensor type is owned by the store itself and will not be
- * serialized at all.
- *
- * The parameters for serialization (see DataFromType) are:
- * - number of mapped dimensions [MD]
- * - dense subspace size [DS]
- * - size of each cell [CS] - currently 4 (float) or 8 (double)
- * - alignment for cells - currently 4 (float) or 8 (double)
- * While the tensor value to be serialized has:
- * - number of dense subspaces [ND]
- * - labels for dense subspaces, ND * MD strings
- * - cell values, ND * DS cells (each either float or double)
- * The serialization format looks like:
- *
- * [bytes] : [format] : [description]
- * 4 : n.b.o. uint32_ t : num cells = ND * DS
- * CS * ND * DS : native float or double : cells
- * (depends) : n.b.o. strings : ND * MD label strings
- *
- * Here, n.b.o. means network byte order, or more precisely
- * it's the format vespalib::nbostream uses for the given data type,
- * including strings (where exact format depends on the string length).
- * Note that the only unpredictably-sized data (the labels) are kept
- * last.
- * If we ever make a "hbostream" which uses host byte order, we
- * could switch to that instead since these data are only kept in
- * memory.
+ * Class for StreamedValue tensors in memory.
*/
class StreamedValueStore : public TensorStore {
public:
- using RefType = vespalib::datastore::AlignedEntryRefT<22, 3>;
- using DataStoreType = vespalib::datastore::DataStoreT<RefType>;
+ using Value = vespalib::eval::Value;
+ using ValueType = vespalib::eval::ValueType;
+ using Handles = vespalib::SharedStringRepo::StrongHandles;
+ using MemoryUsage = vespalib::MemoryUsage;
- struct StreamedValueData {
- bool valid;
- vespalib::eval::TypedCells cells_ref;
- size_t num_subspaces;
- vespalib::ConstArrayRef<char> labels_buffer;
- operator bool() const { return valid; }
+ // interface for tensor entries
+ struct TensorEntry {
+ using SP = std::shared_ptr<TensorEntry>;
+ virtual Value::UP create_fast_value_view(const ValueType &type_ref) const = 0;
+ virtual void encode_value(const ValueType &type, vespalib::nbostream &target) const = 0;
+ virtual MemoryUsage get_memory_usage() const = 0;
+ virtual ~TensorEntry();
+ static TensorEntry::SP create_shared_entry(const Value &value);
};
- struct DataFromType {
- uint32_t num_mapped_dimensions;
- uint32_t dense_subspace_size;
- vespalib::eval::CellType cell_type;
-
- DataFromType(const vespalib::eval::ValueType& type)
- : num_mapped_dimensions(type.count_mapped_dimensions()),
- dense_subspace_size(type.dense_subspace_size()),
- cell_type(type.cell_type())
- {}
+ // implementation of tensor entries
+ template <typename CT>
+ struct TensorEntryImpl : public TensorEntry {
+ Handles handles;
+ std::vector<CT> cells;
+ TensorEntryImpl(const Value &value, size_t num_mapped, size_t dense_size);
+ Value::UP create_fast_value_view(const ValueType &type_ref) const override;
+ void encode_value(const ValueType &type, vespalib::nbostream &target) const override;
+ MemoryUsage get_memory_usage() const override;
+ ~TensorEntryImpl() override;
};
private:
- DataStoreType _concreteStore;
- vespalib::datastore::BufferType<char> _bufferType;
- vespalib::eval::ValueType _tensor_type;
- DataFromType _data_from_type;
-
- void serialize_labels(const vespalib::eval::Value::Index &index,
- vespalib::nbostream &target) const;
+ // Note: Must use SP (instead of UP) because of fallbackCopy() and initializeReservedElements() in BufferType,
+ // and implementation of move().
+ using TensorStoreType = vespalib::datastore::DataStore<TensorEntry::SP>;
- std::pair<const char *, uint32_t> getRawBuffer(RefType ref) const;
- vespalib::datastore::Handle<char> allocRawBuffer(uint32_t size);
+ class TensorBufferType : public vespalib::datastore::BufferType<TensorEntry::SP> {
+ private:
+ using ParentType = BufferType<TensorEntry::SP>;
+ using ParentType::_emptyEntry;
+ using CleanContext = typename ParentType::CleanContext;
+ public:
+ TensorBufferType();
+ virtual void cleanHold(void* buffer, size_t offset, size_t num_elems, CleanContext clean_ctx) override;
+ };
+ TensorStoreType _concrete_store;
+ const vespalib::eval::ValueType _tensor_type;
+ EntryRef add_entry(TensorEntry::SP tensor);
public:
StreamedValueStore(const vespalib::eval::ValueType &tensor_type);
- virtual ~StreamedValueStore();
+ ~StreamedValueStore() override;
+
+ using RefType = TensorStoreType::RefType;
- virtual void holdTensor(EntryRef ref) override;
- virtual EntryRef move(EntryRef ref) override;
+ void holdTensor(EntryRef ref) override;
+ EntryRef move(EntryRef ref) override;
- StreamedValueData get_tensor_data(EntryRef ref) const;
+ const TensorEntry * get_tensor_entry(EntryRef ref) const;
bool encode_tensor(EntryRef ref, vespalib::nbostream &target) const;
EntryRef store_tensor(const vespalib::eval::Value &tensor);
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
index a5ec9540a1b..e529b1190d9 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.cpp
@@ -7,6 +7,18 @@ namespace vespalib {
SharedStringRepo::Partition::~Partition() = default;
void
+SharedStringRepo::Partition::find_leaked_entries(size_t my_idx) const
+{
+ for (size_t i = 0; i < _entries.size(); ++i) {
+ if (!_entries[i].is_free()) {
+ size_t id = (((i << PART_BITS) | my_idx) + 1);
+ fprintf(stderr, "WARNING: shared_string_repo: leaked string id: %zu ('%s')\n",
+ id, _entries[i].str().c_str());
+ }
+ }
+}
+
+void
SharedStringRepo::Partition::make_entries(size_t hint)
{
hint = std::max(hint, _entries.size() + 1);
@@ -20,7 +32,12 @@ SharedStringRepo::Partition::make_entries(size_t hint)
}
SharedStringRepo::SharedStringRepo() = default;
-SharedStringRepo::~SharedStringRepo() = default;
+SharedStringRepo::~SharedStringRepo()
+{
+ for (size_t p = 0; p < _partitions.size(); ++p) {
+ _partitions[p].find_leaked_entries(p);
+ }
+}
SharedStringRepo &
SharedStringRepo::get()
@@ -44,6 +61,13 @@ SharedStringRepo::StrongHandles::StrongHandles(size_t expect_size)
_handles.reserve(expect_size);
}
+SharedStringRepo::StrongHandles::StrongHandles(StrongHandles &&rhs)
+ : _repo(rhs._repo),
+ _handles(std::move(rhs._handles))
+{
+ assert(rhs._handles.empty());
+}
+
SharedStringRepo::StrongHandles::~StrongHandles()
{
for (uint32_t handle: _handles) {
diff --git a/vespalib/src/vespa/vespalib/util/shared_string_repo.h b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
index afdd3a289f9..f7137984caa 100644
--- a/vespalib/src/vespa/vespalib/util/shared_string_repo.h
+++ b/vespalib/src/vespa/vespalib/util/shared_string_repo.h
@@ -10,6 +10,7 @@
#include <mutex>
#include <vector>
#include <array>
+#include <cassert>
namespace vespalib {
@@ -34,21 +35,43 @@ private:
class alignas(64) Partition {
public:
- struct Entry {
+ class Entry {
+ public:
static constexpr uint32_t npos = -1;
- uint32_t hash;
- uint32_t ref_cnt;
- vespalib::string str;
- explicit Entry(uint32_t next) noexcept : hash(), ref_cnt(next), str() {}
+ private:
+ uint32_t _hash;
+ uint32_t _ref_cnt;
+ vespalib::string _str;
+ public:
+ explicit Entry(uint32_t next) noexcept
+ : _hash(next), _ref_cnt(npos), _str() {}
+ constexpr uint32_t hash() const noexcept { return _hash; }
+ constexpr const vespalib::string &str() const noexcept { return _str; }
+ constexpr bool is_free() const noexcept { return (_ref_cnt == npos); }
uint32_t init(const AltKey &key) {
- uint32_t next = ref_cnt;
- hash = key.hash;
- ref_cnt = 1;
- str = key.str;
+ uint32_t next = _hash;
+ _hash = key.hash;
+ _ref_cnt = 1;
+ _str = key.str;
return next;
}
void fini(uint32_t next) {
- ref_cnt = next;
+ _hash = next;
+ _ref_cnt = npos;
+ // to reset or not to reset...
+ // _str.reset();
+ }
+ vespalib::string as_string() const {
+ assert(!is_free());
+ return _str;
+ }
+ void add_ref() {
+ assert(!is_free());
+ ++_ref_cnt;
+ }
+ bool sub_ref() {
+ assert(!is_free());
+ return (--_ref_cnt == 0);
}
};
struct Key {
@@ -64,7 +87,7 @@ private:
Equal(const std::vector<Entry> &entries_in) : entries(entries_in) {}
Equal(const Equal &rhs) = default;
bool operator()(const Key &a, const Key &b) const { return (a.idx == b.idx); }
- bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str == b.str)); }
+ bool operator()(const Key &a, const AltKey &b) const { return ((a.hash == b.hash) && (entries[a.idx].str() == b.str)); }
};
using HashType = hashtable<Key,Key,Hash,Equal,Identity,hashtable_base::and_modulator>;
@@ -92,12 +115,13 @@ private:
make_entries(64);
}
~Partition();
+ void find_leaked_entries(size_t my_idx) const;
uint32_t resolve(const AltKey &alt_key) {
std::lock_guard guard(_lock);
auto pos = _hash.find(alt_key);
if (pos != _hash.end()) {
- ++_entries[pos->idx].ref_cnt;
+ _entries[pos->idx].add_ref();
return pos->idx;
} else {
uint32_t idx = make_entry(alt_key);
@@ -108,19 +132,19 @@ private:
vespalib::string as_string(uint32_t idx) {
std::lock_guard guard(_lock);
- return _entries[idx].str;
+ return _entries[idx].as_string();
}
void copy(uint32_t idx) {
std::lock_guard guard(_lock);
- ++_entries[idx].ref_cnt;
+ _entries[idx].add_ref();
}
void reclaim(uint32_t idx) {
std::lock_guard guard(_lock);
Entry &entry = _entries[idx];
- if (--entry.ref_cnt == 0) {
- _hash.erase(Key{idx, entry.hash});
+ if (entry.sub_ref()) {
+ _hash.erase(Key{idx, entry.hash()});
entry.fini(_free);
_free = idx;
}
@@ -178,8 +202,9 @@ public:
class Handle {
private:
uint32_t _id;
+ Handle(uint32_t weak_id) : _id(get().copy(weak_id)) {}
public:
- Handle() : _id(0) {}
+ Handle() noexcept : _id(0) {}
Handle(vespalib::stringref str) : _id(get().resolve(str)) {}
Handle(const Handle &rhs) : _id(get().copy(rhs._id)) {}
Handle &operator=(const Handle &rhs) {
@@ -196,9 +221,15 @@ public:
rhs._id = 0;
return *this;
}
- bool operator==(const Handle &rhs) const { return (_id == rhs._id); }
- uint32_t id() const { return _id; }
+ // NB: not lexical sorting order, but can be used in maps
+ bool operator<(const Handle &rhs) const noexcept { return (_id < rhs._id); }
+ bool operator==(const Handle &rhs) const noexcept { return (_id == rhs._id); }
+ bool operator!=(const Handle &rhs) const noexcept { return (_id != rhs._id); }
+ uint32_t id() const noexcept { return _id; }
+ uint32_t hash() const noexcept { return _id; }
vespalib::string as_string() const { return get().as_string(_id); }
+ static Handle handle_from_id(uint32_t weak_id) { return Handle(weak_id); }
+ static vespalib::string string_from_id(uint32_t weak_id) { return get().as_string(weak_id); }
~Handle() { get().reclaim(_id); }
};
@@ -229,8 +260,20 @@ public:
std::vector<uint32_t> _handles;
public:
StrongHandles(size_t expect_size);
+ StrongHandles(StrongHandles &&rhs);
+ StrongHandles(const StrongHandles &) = delete;
+ StrongHandles &operator=(const StrongHandles &) = delete;
+ StrongHandles &operator=(StrongHandles &&) = delete;
~StrongHandles();
- void add(vespalib::stringref str) { _handles.push_back(_repo.resolve(str)); }
+ uint32_t add(vespalib::stringref str) {
+ uint32_t id = _repo.resolve(str);
+ _handles.push_back(id);
+ return id;
+ }
+ void add(uint32_t handle) {
+ uint32_t id = _repo.copy(handle);
+ _handles.push_back(id);
+ }
HandleView view() const { return HandleView(_handles); }
};
};