summaryrefslogtreecommitdiffstats
path: root/eval/src
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-12-10 15:31:35 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-01-05 12:18:35 +0000
commit74665956e649cb28682ba25d4f2089a7fae5087e (patch)
treeb89c61226566a11e13adb38430ba841d5a20ae0e /eval/src
parent6438e2f64460178525a42fcfdaf207e264a020ef (diff)
use string ids as tensor labels
Diffstat (limited to 'eval/src')
-rw-r--r--eval/src/tests/eval/fast_value/fast_value_test.cpp87
-rw-r--r--eval/src/tests/eval/simple_value/simple_value_test.cpp27
-rw-r--r--eval/src/tests/streamed/value/streamed_value_test.cpp27
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.cpp9
-rw-r--r--eval/src/vespa/eval/eval/fast_addr_map.h152
-rw-r--r--eval/src/vespa/eval/eval/fast_value.cpp15
-rw-r--r--eval/src/vespa/eval/eval/fast_value.h2
-rw-r--r--eval/src/vespa/eval/eval/fast_value.hpp219
-rw-r--r--eval/src/vespa/eval/eval/label.h15
-rw-r--r--eval/src/vespa/eval/eval/simple_value.cpp59
-rw-r--r--eval/src/vespa/eval/eval/simple_value.h9
-rw-r--r--eval/src/vespa/eval/eval/value.cpp4
-rw-r--r--eval/src/vespa/eval/eval/value.h35
-rw-r--r--eval/src/vespa/eval/eval/value_codec.cpp18
-rw-r--r--eval/src/vespa/eval/instruction/generic_concat.cpp8
-rw-r--r--eval/src/vespa/eval/instruction/generic_create.cpp22
-rw-r--r--eval/src/vespa/eval/instruction/generic_join.cpp6
-rw-r--r--eval/src/vespa/eval/instruction/generic_join.h8
-rw-r--r--eval/src/vespa/eval/instruction/generic_merge.cpp8
-rw-r--r--eval/src/vespa/eval/instruction/generic_peek.cpp87
-rw-r--r--eval/src/vespa/eval/instruction/generic_reduce.cpp20
-rw-r--r--eval/src/vespa/eval/instruction/generic_rename.cpp12
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.cpp3
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value.h11
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder.h24
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp2
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_builder_factory.h2
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.cpp12
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_index.h11
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_utils.h27
-rw-r--r--eval/src/vespa/eval/streamed/streamed_value_view.h4
32 files changed, 639 insertions, 307 deletions
diff --git a/eval/src/tests/eval/fast_value/fast_value_test.cpp b/eval/src/tests/eval/fast_value/fast_value_test.cpp
index 03658d8351b..e809fb1bcda 100644
--- a/eval/src/tests/eval/fast_value/fast_value_test.cpp
+++ b/eval/src/tests/eval/fast_value/fast_value_test.cpp
@@ -8,6 +8,8 @@
using namespace vespalib;
using namespace vespalib::eval;
+using Handle = SharedStringRepo::Handle;
+
TEST(FastCellsTest, push_back_fast_works) {
FastCells<float> cells(3);
EXPECT_EQ(cells.capacity, 4);
@@ -60,38 +62,37 @@ TEST(FastCellsTest, add_cells_works) {
using SA = std::vector<vespalib::stringref>;
-TEST(FastValueBuilderTest, dense_add_subspace_robustness) {
+TEST(FastValueBuilderTest, scalar_add_subspace_robustness) {
auto factory = FastValueBuilderFactory::get();
- ValueType type = ValueType::from_spec("tensor(x[2])");
+ ValueType type = ValueType::from_spec("double");
auto builder = factory.create_value_builder<double>(type);
- auto subspace = builder->add_subspace({});
+ auto subspace = builder->add_subspace();
subspace[0] = 17.0;
- subspace[1] = 666;
- auto other = builder->add_subspace({});
- other[1] = 42.0;
+ auto other = builder->add_subspace();
+ other[0] = 42.0;
auto value = builder->build(std::move(builder));
+ EXPECT_EQ(value->index().size(), 1);
auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x[2])").
- add({{"x", 0}}, 17.0).
- add({{"x", 1}}, 42.0);
- EXPECT_EQ(actual, expected);
+ auto expected = TensorSpec("double").
+ add({}, 42.0);
+ EXPECT_EQ(actual, expected);
}
-TEST(FastValueBuilderTest, sparse_add_subspace_robustness) {
+TEST(FastValueBuilderTest, dense_add_subspace_robustness) {
auto factory = FastValueBuilderFactory::get();
- ValueType type = ValueType::from_spec("tensor(x{})");
+ ValueType type = ValueType::from_spec("tensor(x[2])");
auto builder = factory.create_value_builder<double>(type);
- auto subspace = builder->add_subspace(SA{"foo"});
+ auto subspace = builder->add_subspace();
subspace[0] = 17.0;
- subspace = builder->add_subspace(SA{"bar"});
- subspace[0] = 18.0;
- auto other = builder->add_subspace(SA{"foo"});
- other[0] = 42.0;
+ subspace[1] = 666;
+ auto other = builder->add_subspace();
+ other[1] = 42.0;
auto value = builder->build(std::move(builder));
+ EXPECT_EQ(value->index().size(), 1);
auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x{})").
- add({{"x", "bar"}}, 18.0).
- add({{"x", "foo"}}, 42.0);
+ auto expected = TensorSpec("tensor(x[2])").
+ add({{"x", 0}}, 17.0).
+ add({{"x", 1}}, 42.0);
EXPECT_EQ(actual, expected);
}
@@ -100,21 +101,43 @@ TEST(FastValueBuilderTest, mixed_add_subspace_robustness) {
ValueType type = ValueType::from_spec("tensor(x{},y[2])");
auto builder = factory.create_value_builder<double>(type);
auto subspace = builder->add_subspace(SA{"foo"});
- subspace[0] = 17.0;
- subspace[1] = 666;
+ subspace[0] = 1.0;
+ subspace[1] = 5.0;
subspace = builder->add_subspace(SA{"bar"});
- subspace[0] = 18.0;
- subspace[1] = 19.0;
+ subspace[0] = 2.0;
+ subspace[1] = 10.0;
auto other = builder->add_subspace(SA{"foo"});
- other[1] = 42.0;
+ other[0] = 3.0;
+ other[1] = 15.0;
auto value = builder->build(std::move(builder));
- auto actual = spec_from_value(*value);
- auto expected = TensorSpec("tensor(x{},y[2])").
- add({{"x", "foo"}, {"y", 0}}, 17.0).
- add({{"x", "bar"}, {"y", 0}}, 18.0).
- add({{"x", "bar"}, {"y", 1}}, 19.0).
- add({{"x", "foo"}, {"y", 1}}, 42.0);
- EXPECT_EQ(actual, expected);
+ EXPECT_EQ(value->index().size(), 3);
+ Handle foo("foo");
+ Handle bar("bar");
+ label_t label;
+ label_t *label_ptr = &label;
+ size_t subspace_idx;
+ auto get_subspace = [&]() {
+ auto cells = value->cells().typify<double>();
+ return ConstArrayRef<double>(cells.begin() + subspace_idx * 2, 2);
+ };
+ auto view = value->index().create_view({});
+ view->lookup({});
+ while (view->next_result({&label_ptr, 1}, subspace_idx)) {
+ if (label == bar.id()) {
+ auto values = get_subspace();
+ EXPECT_EQ(values[0], 2.0);
+ EXPECT_EQ(values[1], 10.0);
+ } else {
+ EXPECT_EQ(label, foo.id());
+ auto values = get_subspace();
+ if (values[0] == 1) {
+ EXPECT_EQ(values[1], 5.0);
+ } else {
+ EXPECT_EQ(values[0], 3.0);
+ EXPECT_EQ(values[1], 15.0);
+ }
+ }
+ }
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp
index c05f9976e1a..1691d5c263c 100644
--- a/eval/src/tests/eval/simple_value/simple_value_test.cpp
+++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp
@@ -16,8 +16,12 @@ using namespace vespalib::eval::test;
using vespalib::make_string_short::fmt;
-using PA = std::vector<vespalib::stringref *>;
-using CPA = std::vector<const vespalib::stringref *>;
+using PA = std::vector<label_t *>;
+using CPA = std::vector<const label_t *>;
+
+using Handle = SharedStringRepo::Handle;
+
+vespalib::string as_str(label_t label) { return Handle::string_from_id(label); }
std::vector<Layout> layouts = {
{},
@@ -98,17 +102,18 @@ TEST(SimpleValueTest, simple_value_can_be_built_and_inspected) {
std::unique_ptr<Value> value = builder->build(std::move(builder));
EXPECT_EQ(value->index().size(), 6);
auto view = value->index().create_view({0});
- vespalib::stringref query = "b";
- vespalib::stringref label;
+ Handle query_handle("b");
+ label_t query = query_handle.id();
+ label_t label;
size_t subspace;
+ std::map<vespalib::string,size_t> result;
view->lookup(CPA{&query});
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "aa");
- EXPECT_EQ(subspace, 2);
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "bb");
- EXPECT_EQ(subspace, 3);
- EXPECT_FALSE(view->next_result(PA{&label}, subspace));
+ while (view->next_result(PA{&label}, subspace)) {
+ result[as_str(label)] = subspace;
+ }
+ EXPECT_EQ(result.size(), 2);
+ EXPECT_EQ(result["aa"], 2);
+ EXPECT_EQ(result["bb"], 3);
}
TEST(SimpleValueTest, new_generic_join_works_for_simple_values) {
diff --git a/eval/src/tests/streamed/value/streamed_value_test.cpp b/eval/src/tests/streamed/value/streamed_value_test.cpp
index 05d6e20451c..5221c4eda64 100644
--- a/eval/src/tests/streamed/value/streamed_value_test.cpp
+++ b/eval/src/tests/streamed/value/streamed_value_test.cpp
@@ -16,8 +16,12 @@ using namespace vespalib::eval::test;
using vespalib::make_string_short::fmt;
-using PA = std::vector<vespalib::stringref *>;
-using CPA = std::vector<const vespalib::stringref *>;
+using PA = std::vector<label_t *>;
+using CPA = std::vector<const label_t *>;
+
+using Handle = SharedStringRepo::Handle;
+
+vespalib::string as_str(label_t label) { return Handle::string_from_id(label); }
std::vector<Layout> layouts = {
{},
@@ -98,17 +102,18 @@ TEST(StreamedValueTest, streamed_value_can_be_built_and_inspected) {
std::unique_ptr<Value> value = builder->build(std::move(builder));
EXPECT_EQ(value->index().size(), 6);
auto view = value->index().create_view({0});
- vespalib::stringref query = "b";
- vespalib::stringref label;
+ Handle query_handle("b");
+ label_t query = query_handle.id();
+ label_t label;
size_t subspace;
+ std::map<vespalib::string,size_t> result;
view->lookup(CPA{&query});
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "aa");
- EXPECT_EQ(subspace, 2);
- EXPECT_TRUE(view->next_result(PA{&label}, subspace));
- EXPECT_EQ(label, "bb");
- EXPECT_EQ(subspace, 3);
- EXPECT_FALSE(view->next_result(PA{&label}, subspace));
+ while (view->next_result(PA{&label}, subspace)) {
+ result[as_str(label)] = subspace;
+ }
+ EXPECT_EQ(result.size(), 2);
+ EXPECT_EQ(result["aa"], 2);
+ EXPECT_EQ(result["bb"], 3);
}
TEST(StreamedValueTest, new_generic_join_works_for_streamed_values) {
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 01eeff49662..5f8dd478a7b 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_add_library(eval_eval OBJECT
delete_node.cpp
dense_cells_value.cpp
double_value_builder.cpp
+ fast_addr_map.cpp
fast_forest.cpp
fast_sparse_map.cpp
fast_value.cpp
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.cpp b/eval/src/vespa/eval/eval/fast_addr_map.cpp
new file mode 100644
index 00000000000..73163f411e6
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_addr_map.cpp
@@ -0,0 +1,9 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fast_addr_map.h"
+
+namespace vespalib::eval {
+
+FastAddrMap::~FastAddrMap() = default;
+
+}
diff --git a/eval/src/vespa/eval/eval/fast_addr_map.h b/eval/src/vespa/eval/eval/fast_addr_map.h
new file mode 100644
index 00000000000..a8a82718a28
--- /dev/null
+++ b/eval/src/vespa/eval/eval/fast_addr_map.h
@@ -0,0 +1,152 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "label.h"
+#include "memory_usage_stuff.h"
+#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/stllike/identity.h>
+#include <vespa/vespalib/stllike/hashtable.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
+#include <vector>
+
+namespace vespalib::eval {
+
+/**
+ * A wrapper around vespalib::hashtable, using it to map a list of
+ * labels (a sparse address) to an integer value (dense subspace
+ * index). Labels are represented by string enum values stored and
+ * handled outside this class.
+ **/
+class FastAddrMap
+{
+public:
+ // label hasing functions
+ static constexpr uint32_t hash_label(label_t label) { return label; }
+ static constexpr uint32_t hash_label(const label_t *label) { return *label; }
+ static constexpr uint32_t combine_label_hash(uint32_t full_hash, uint32_t next_hash) {
+ return ((full_hash * 31) + next_hash);
+ }
+ template <typename T>
+ static constexpr uint32_t hash_labels(ConstArrayRef<T> addr) {
+ uint32_t hash = 0;
+ for (const T &label: addr) {
+ hash = combine_label_hash(hash, hash_label(label));
+ }
+ return hash;
+ }
+
+ // typed uint32_t index used to identify sparse address/dense subspace
+ struct Tag {
+ uint32_t idx;
+ static constexpr uint32_t npos() { return uint32_t(-1); }
+ static constexpr Tag make_invalid() { return Tag{npos()}; }
+ constexpr bool valid() const { return (idx != npos()); }
+ };
+
+ // sparse hash set entry
+ struct Entry {
+ Tag tag;
+ uint32_t hash;
+ };
+
+ // alternative key(s) used for lookup in sparse hash set
+ template <typename T> struct AltKey {
+ ConstArrayRef<T> key;
+ uint32_t hash;
+ };
+
+ // view able to convert tags into sparse addresses
+ struct LabelView {
+ size_t addr_size;
+ const std::vector<label_t> &labels;
+ LabelView(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view)
+ : addr_size(num_mapped_dims), labels(handle_view.handles()) {}
+ ConstArrayRef<label_t> get_addr(size_t idx) const {
+ return {&labels[idx * addr_size], addr_size};
+ }
+ };
+
+ // hashing functor for sparse hash set
+ struct Hash {
+ template <typename T>
+ constexpr uint32_t operator()(const AltKey<T> &key) const { return key.hash; }
+ constexpr uint32_t operator()(const Entry &entry) const { return entry.hash; }
+ };
+
+ // equality functor for sparse hash set
+ struct Equal {
+ const LabelView &label_view;
+ Equal(const LabelView &label_view_in) : label_view(label_view_in) {}
+ static constexpr bool eq_labels(label_t a, label_t b) { return (a == b); }
+ static constexpr bool eq_labels(label_t a, const label_t *b) { return (a == *b); }
+ template <typename T>
+ bool operator()(const Entry &a, const AltKey<T> &b) const {
+ if ((a.hash != b.hash) || (b.key.size() != label_view.addr_size)) {
+ return false;
+ }
+ auto a_key = label_view.get_addr(a.tag.idx);
+ for (size_t i = 0; i < a_key.size(); ++i) {
+ if (!eq_labels(a_key[i], b.key[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+
+ using HashType = hashtable<Entry, Entry, Hash, Equal, Identity, hashtable_base::and_modulator>;
+
+private:
+ LabelView _labels;
+ HashType _map;
+
+public:
+ FastAddrMap(size_t num_mapped_dims, SharedStringRepo::HandleView handle_view, size_t expected_subspaces)
+ : _labels(num_mapped_dims, handle_view),
+ _map(expected_subspaces * 2, Hash(), Equal(_labels)) {}
+ ~FastAddrMap();
+ FastAddrMap(const FastAddrMap &) = delete;
+ FastAddrMap &operator=(const FastAddrMap &) = delete;
+ FastAddrMap(FastAddrMap &&) = delete;
+ FastAddrMap &operator=(FastAddrMap &&) = delete;
+ static constexpr size_t npos() { return -1; }
+ ConstArrayRef<label_t> get_addr(size_t idx) const { return _labels.get_addr(idx); }
+ size_t size() const { return _map.size(); }
+ constexpr size_t addr_size() const { return _labels.addr_size; }
+ template <typename T>
+ size_t lookup(ConstArrayRef<T> addr, uint32_t hash) const {
+ AltKey<T> key{addr, hash};
+ auto pos = _map.find(key);
+ return (pos == _map.end()) ? npos() : pos->tag.idx;
+ }
+ template <typename T>
+ size_t lookup(ConstArrayRef<T> addr) const {
+ return lookup(addr, hash_labels(addr));
+ }
+ void add_mapping(uint32_t hash) {
+ uint32_t idx = _map.size();
+ _map.force_insert(Entry{{idx}, hash});
+ }
+ template <typename F>
+ void each_map_entry(F &&f) const {
+ _map.for_each([&](const auto &entry)
+ {
+ f(entry.tag.idx, entry.hash);
+ });
+ }
+ MemoryUsage estimate_extra_memory_usage() const {
+ MemoryUsage extra_usage;
+ size_t map_self_size = sizeof(_map);
+ size_t map_used = _map.getMemoryUsed();
+ size_t map_allocated = _map.getMemoryConsumption();
+ // avoid double-counting the map itself
+ map_used = std::min(map_used, map_used - map_self_size);
+ map_allocated = std::min(map_allocated, map_allocated - map_self_size);
+ extra_usage.incUsedBytes(map_used);
+ extra_usage.incAllocatedBytes(map_allocated);
+ return extra_usage;
+ }
+};
+
+}
diff --git a/eval/src/vespa/eval/eval/fast_value.cpp b/eval/src/vespa/eval/eval/fast_value.cpp
index 116e561a868..96d0fa84149 100644
--- a/eval/src/vespa/eval/eval/fast_value.cpp
+++ b/eval/src/vespa/eval/eval/fast_value.cpp
@@ -11,7 +11,7 @@ namespace vespalib::eval {
namespace {
struct CreateFastValueBuilderBase {
- template <typename T> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type,
+ template <typename T, typename R2> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces)
{
assert(check_cell_type<T>(type.cell_type()));
@@ -20,7 +20,7 @@ struct CreateFastValueBuilderBase {
} else if (num_mapped_dims == 0) {
return std::make_unique<FastDenseValue<T>>(type, subspace_size);
} else {
- return std::make_unique<FastValue<T>>(type, num_mapped_dims, subspace_size, expected_subspaces);
+ return std::make_unique<FastValue<T,R2::value>>(type, num_mapped_dims, subspace_size, expected_subspaces);
}
}
};
@@ -32,11 +32,11 @@ struct CreateFastValueBuilderBase {
std::unique_ptr<Value::Index::View>
FastValueIndex::create_view(const std::vector<size_t> &dims) const
{
- if (map.num_dims() == 0) {
+ if (map.addr_size() == 0) {
return TrivialIndex::get().create_view(dims);
} else if (dims.empty()) {
return std::make_unique<FastIterateView>(map);
- } else if (dims.size() == map.num_dims()) {
+ } else if (dims.size() == map.addr_size()) {
return std::make_unique<FastLookupView>(map);
} else {
return std::make_unique<FastFilterView>(map, dims);
@@ -49,10 +49,11 @@ FastValueBuilderFactory::FastValueBuilderFactory() = default;
FastValueBuilderFactory FastValueBuilderFactory::_factory;
std::unique_ptr<ValueBuilderBase>
-FastValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size,
- size_t expected_subspaces) const
+FastValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size,
+ size_t expected_subspaces) const
{
- return typify_invoke<1,TypifyCellType,CreateFastValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces);
+ using MyTypify = TypifyValue<TypifyCellType,TypifyBool>;
+ return typify_invoke<2,MyTypify,CreateFastValueBuilderBase>(type.cell_type(), transient, type, num_mapped_dims, subspace_size, expected_subspaces);
}
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/fast_value.h b/eval/src/vespa/eval/eval/fast_value.h
index ac924ecc6eb..c6280b492db 100644
--- a/eval/src/vespa/eval/eval/fast_value.h
+++ b/eval/src/vespa/eval/eval/fast_value.h
@@ -19,7 +19,7 @@ class FastValueBuilderFactory : public ValueBuilderFactory {
private:
FastValueBuilderFactory();
static FastValueBuilderFactory _factory;
- std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override;
public:
static const FastValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/eval/fast_value.hpp b/eval/src/vespa/eval/eval/fast_value.hpp
index 9914378cc9e..972aa68b8bd 100644
--- a/eval/src/vespa/eval/eval/fast_value.hpp
+++ b/eval/src/vespa/eval/eval/fast_value.hpp
@@ -1,11 +1,10 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "value.h"
-#include "fast_sparse_map.h"
+#include "fast_addr_map.h"
#include "inline_operation.h"
#include <vespa/eval/instruction/generic_join.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-#include <vespa/vespalib/util/alloc.h>
+#include <vespa/vespalib/stllike/hashtable.hpp>
namespace vespalib::eval {
@@ -18,22 +17,22 @@ namespace {
// look up a full address in the map directly
struct FastLookupView : public Value::Index::View {
- const FastSparseMap &map;
- size_t subspace;
+ const FastAddrMap &map;
+ size_t subspace;
- FastLookupView(const FastSparseMap &map_in)
- : map(map_in), subspace(FastSparseMap::npos()) {}
+ FastLookupView(const FastAddrMap &map_in)
+ : map(map_in), subspace(FastAddrMap::npos()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
subspace = map.lookup(addr);
}
- bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override {
- if (subspace == FastSparseMap::npos()) {
+ bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override {
+ if (subspace == FastAddrMap::npos()) {
return false;
}
idx_out = subspace;
- subspace = FastSparseMap::npos();
+ subspace = FastAddrMap::npos();
return true;
}
};
@@ -43,30 +42,27 @@ struct FastLookupView : public Value::Index::View {
// find matching mappings for a partial address with brute force filtering
struct FastFilterView : public Value::Index::View {
- using Label = FastSparseMap::HashedLabel;
-
- size_t num_mapped_dims;
- const std::vector<Label> &labels;
+ const FastAddrMap &map;
std::vector<size_t> match_dims;
std::vector<size_t> extract_dims;
- std::vector<Label> query;
+ std::vector<label_t> query;
size_t pos;
- bool is_match() const {
+ bool is_match(ConstArrayRef<label_t> addr) const {
for (size_t i = 0; i < query.size(); ++i) {
- if (query[i].hash != labels[pos + match_dims[i]].hash) {
+ if (query[i] != addr[match_dims[i]]) {
return false;
}
}
return true;
}
- FastFilterView(const FastSparseMap &map, const std::vector<size_t> &match_dims_in)
- : num_mapped_dims(map.num_dims()), labels(map.labels()), match_dims(match_dims_in),
- extract_dims(), query(match_dims.size(), Label()), pos(labels.size())
+ FastFilterView(const FastAddrMap &map_in, const std::vector<size_t> &match_dims_in)
+ : map(map_in), match_dims(match_dims_in),
+ extract_dims(), query(match_dims.size()), pos(FastAddrMap::npos())
{
auto my_pos = match_dims.begin();
- for (size_t i = 0; i < num_mapped_dims; ++i) {
+ for (size_t i = 0; i < map.addr_size(); ++i) {
if ((my_pos == match_dims.end()) || (*my_pos != i)) {
extract_dims.push_back(i);
} else {
@@ -74,29 +70,29 @@ struct FastFilterView : public Value::Index::View {
}
}
assert(my_pos == match_dims.end());
- assert((match_dims.size() + extract_dims.size()) == num_mapped_dims);
+ assert((match_dims.size() + extract_dims.size()) == map.addr_size());
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == query.size());
for (size_t i = 0; i < addr.size(); ++i) {
- query[i] = Label(*addr[i]);
+ query[i] = *addr[i];
}
pos = 0;
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
- while (pos < labels.size()) {
- if (is_match()) {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
+ while (pos < map.size()) {
+ auto addr = map.get_addr(pos);
+ if (is_match(addr)) {
assert(addr_out.size() == extract_dims.size());
for (size_t i = 0; i < extract_dims.size(); ++i) {
- *addr_out[i] = labels[pos + extract_dims[i]].label;
+ *addr_out[i] = addr[extract_dims[i]];
}
- idx_out = (pos / num_mapped_dims); // is this expensive?
- pos += num_mapped_dims;
+ idx_out = pos++;
return true;
}
- pos += num_mapped_dims;
+ ++pos;
}
return false;
}
@@ -107,29 +103,26 @@ struct FastFilterView : public Value::Index::View {
// iterate all mappings
struct FastIterateView : public Value::Index::View {
- using Labels = std::vector<FastSparseMap::HashedLabel>;
-
- size_t num_mapped_dims;
- const Labels &labels;
- size_t pos;
+ const FastAddrMap &map;
+ size_t pos;
- FastIterateView(const FastSparseMap &map)
- : num_mapped_dims(map.num_dims()), labels(map.labels()), pos(labels.size()) {}
+ FastIterateView(const FastAddrMap &map_in)
+ : map(map_in), pos(FastAddrMap::npos()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*>) override {
+ void lookup(ConstArrayRef<const label_t*>) override {
pos = 0;
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
- if (pos >= labels.size()) {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
+ if (pos >= map.size()) {
return false;
}
- assert(addr_out.size() == num_mapped_dims);
- for (size_t i = 0; i < num_mapped_dims; ++i) {
- *addr_out[i] = labels[pos + i].label;
+ auto addr = map.get_addr(pos);
+ assert(addr.size() == addr_out.size());
+ for (size_t i = 0; i < addr.size(); ++i) {
+ *addr_out[i] = addr[i];
}
- idx_out = (pos / num_mapped_dims); // is this expensive?
- pos += num_mapped_dims;
+ idx_out = pos++;
return true;
}
};
@@ -145,9 +138,9 @@ using JoinAddrSource = instruction::SparseJoinPlan::Source;
// operations by calling inline functions directly.
struct FastValueIndex final : Value::Index {
- FastSparseMap map;
- FastValueIndex(size_t num_mapped_dims_in, size_t expected_subspaces_in)
- : map(num_mapped_dims_in, expected_subspaces_in) {}
+ FastAddrMap map;
+ FastValueIndex(size_t num_mapped_dims_in, SharedStringRepo::HandleView handle_view, size_t expected_subspaces_in)
+ : map(num_mapped_dims_in, handle_view, expected_subspaces_in) {}
template <typename LCT, typename RCT, typename OCT, typename Fun>
static const Value &sparse_full_overlap_join(const ValueType &res_type, const Fun &fun,
@@ -220,31 +213,64 @@ struct FastCells {
//-----------------------------------------------------------------------------
-template <typename T>
+template <typename T, bool transient>
struct FastValue final : Value, ValueBuilder<T> {
+ using Handles = std::conditional<transient,
+ SharedStringRepo::WeakHandles,
+ SharedStringRepo::StrongHandles>::type;
+
ValueType my_type;
size_t my_subspace_size;
+ Handles my_handles;
FastValueIndex my_index;
FastCells<T> my_cells;
FastValue(const ValueType &type_in, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in)
: my_type(type_in), my_subspace_size(subspace_size_in),
- my_index(num_mapped_dims_in, expected_subspaces_in),
+ my_handles(expected_subspaces_in * num_mapped_dims_in),
+ my_index(num_mapped_dims_in, my_handles.view(), expected_subspaces_in),
my_cells(subspace_size_in * expected_subspaces_in) {}
~FastValue() override;
const ValueType &type() const override { return my_type; }
const Value::Index &index() const override { return my_index; }
TypedCells cells() const override { return TypedCells(my_cells.memory, get_cell_type<T>(), my_cells.size); }
+ void add_mapping(ConstArrayRef<vespalib::stringref> addr) {
+ if constexpr (transient) {
+ (void) addr;
+ abort(); // cannot use this for transient values
+ } else {
+ uint32_t hash = 0;
+ for (const auto &label: addr) {
+ hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(my_handles.add(label)));
+ }
+ my_index.map.add_mapping(hash);
+ }
+ }
+ void add_mapping(ConstArrayRef<label_t> addr) {
+ uint32_t hash = 0;
+ for (label_t label: addr) {
+ hash = FastAddrMap::combine_label_hash(hash, FastAddrMap::hash_label(label));
+ my_handles.add(label);
+ }
+ my_index.map.add_mapping(hash);
+ }
+ void add_mapping(ConstArrayRef<label_t> addr, uint32_t hash) {
+ for (label_t label: addr) {
+ my_handles.add(label);
+ }
+ my_index.map.add_mapping(hash);
+ }
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
- size_t idx = my_index.map.add_mapping(addr) * my_subspace_size;
- if (__builtin_expect((idx == my_cells.size), true)) {
- return my_cells.add_cells(my_subspace_size);
- }
- return ArrayRef<T>(my_cells.get(idx), my_subspace_size);
+ add_mapping(addr);
+ return my_cells.add_cells(my_subspace_size);
+ }
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override {
+ add_mapping(addr);
+ return my_cells.add_cells(my_subspace_size);
}
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
- if (my_index.map.num_dims() == 0) {
+ if (my_index.map.addr_size() == 0) {
assert(my_index.map.size() == 1);
}
assert(my_cells.size == (my_index.map.size() * my_subspace_size));
@@ -254,13 +280,14 @@ struct FastValue final : Value, ValueBuilder<T> {
return std::unique_ptr<Value>(this);
}
MemoryUsage get_memory_usage() const override {
- MemoryUsage usage = self_memory_usage<FastValue<T>>();
+ MemoryUsage usage = self_memory_usage<FastValue<T,transient>>();
+ usage.merge(vector_extra_memory_usage(my_handles.view().handles()));
usage.merge(my_index.map.estimate_extra_memory_usage());
usage.merge(my_cells.estimate_extra_memory_usage());
return usage;
}
};
-template <typename T> FastValue<T>::~FastValue() = default;
+template <typename T,bool transient> FastValue<T,transient>::~FastValue() = default;
//-----------------------------------------------------------------------------
@@ -282,6 +309,9 @@ struct FastDenseValue final : Value, ValueBuilder<T> {
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) override {
return ArrayRef<T>(my_cells.get(0), my_cells.size);
}
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t>) override {
+ return ArrayRef<T>(my_cells.get(0), my_cells.size);
+ }
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
ValueBuilder<T>* me = this;
assert(me == self.get());
@@ -289,7 +319,7 @@ struct FastDenseValue final : Value, ValueBuilder<T> {
return std::unique_ptr<Value>(this);
}
MemoryUsage get_memory_usage() const override {
- MemoryUsage usage = self_memory_usage<FastValue<T>>();
+ MemoryUsage usage = self_memory_usage<FastDenseValue<T>>();
usage.merge(my_cells.estimate_extra_memory_usage());
return usage;
}
@@ -302,6 +332,7 @@ template <typename T>
struct FastScalarBuilder final : ValueBuilder<T> {
T _value;
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref>) final override { return ArrayRef<T>(&_value, 1); }
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t>) final override { return ArrayRef<T>(&_value, 1); };
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>>) final override { return std::make_unique<ScalarValue<T>>(_value); }
};
@@ -313,19 +344,16 @@ FastValueIndex::sparse_full_overlap_join(const ValueType &res_type, const Fun &f
const FastValueIndex &lhs, const FastValueIndex &rhs,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
- auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, lhs.map.size());
- auto &result_map = result.my_index.map;
- lhs.map.each_map_entry([&](auto lhs_subspace, auto hash)
- {
- auto rhs_subspace = rhs.map.lookup(hash);
- if (rhs_subspace != FastSparseMap::npos()) {
- auto idx = result_map.add_mapping(lhs.map.make_addr(lhs_subspace), hash);
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- result.my_cells.push_back_fast(cell_value);
- }
- }
- });
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, lhs.map.size());
+ lhs.map.each_map_entry([&](auto lhs_subspace, auto hash) {
+ auto lhs_addr = lhs.map.get_addr(lhs_subspace);
+ auto rhs_subspace = rhs.map.lookup(lhs_addr, hash);
+ if (rhs_subspace != FastAddrMap::npos()) {
+ result.add_mapping(lhs_addr, hash);
+ auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
+ result.my_cells.push_back_fast(cell_value);
+ }
+ });
return result;
}
@@ -338,10 +366,9 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun
const std::vector<JoinAddrSource> &addr_sources,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
- using HashedLabelRef = std::reference_wrapper<const FastSparseMap::HashedLabel>;
size_t num_mapped_dims = addr_sources.size();
- auto &result = stash.create<FastValue<OCT>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size());
- std::vector<HashedLabelRef> output_addr(num_mapped_dims, FastSparseMap::empty_label);
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, num_mapped_dims, 1, lhs.map.size()*rhs.map.size());
+ std::vector<label_t> output_addr(num_mapped_dims);
std::vector<size_t> store_lhs_idx;
std::vector<size_t> store_rhs_idx;
size_t out_idx = 0;
@@ -359,24 +386,22 @@ FastValueIndex::sparse_no_overlap_join(const ValueType &res_type, const Fun &fun
}
assert(out_idx == output_addr.size());
for (size_t lhs_subspace = 0; lhs_subspace < lhs.map.size(); ++lhs_subspace) {
- auto l_addr = lhs.map.make_addr(lhs_subspace);
+ auto l_addr = lhs.map.get_addr(lhs_subspace);
assert(l_addr.size() == store_lhs_idx.size());
for (size_t i = 0; i < store_lhs_idx.size(); ++i) {
size_t addr_idx = store_lhs_idx[i];
output_addr[addr_idx] = l_addr[i];
}
for (size_t rhs_subspace = 0; rhs_subspace < rhs.map.size(); ++rhs_subspace) {
- auto r_addr = rhs.map.make_addr(rhs_subspace);
+ auto r_addr = rhs.map.get_addr(rhs_subspace);
assert(r_addr.size() == store_rhs_idx.size());
for (size_t i = 0; i < store_rhs_idx.size(); ++i) {
size_t addr_idx = store_rhs_idx[i];
output_addr[addr_idx] = r_addr[i];
}
- auto idx = result.my_index.map.add_mapping(ConstArrayRef(output_addr));
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- result.my_cells.push_back_fast(cell_value);
- }
+ result.add_mapping(ConstArrayRef(output_addr));
+ auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
+ result.my_cells.push_back_fast(cell_value);
}
}
return result;
@@ -391,22 +416,22 @@ FastValueIndex::sparse_only_merge(const ValueType &res_type, const Fun &fun,
ConstArrayRef<LCT> lhs_cells, ConstArrayRef<RCT> rhs_cells, Stash &stash)
{
size_t guess_size = lhs.map.size() + rhs.map.size();
- auto &result = stash.create<FastValue<OCT>>(res_type, lhs.map.num_dims(), 1, guess_size);
- result.my_index = lhs;
- for (auto val : lhs_cells) {
- result.my_cells.push_back_fast(val);
- }
+ auto &result = stash.create<FastValue<OCT,true>>(res_type, lhs.map.addr_size(), 1, guess_size);
+ lhs.map.each_map_entry([&](auto lhs_subspace, auto hash)
+ {
+ result.add_mapping(lhs.map.get_addr(lhs_subspace), hash);
+ result.my_cells.push_back_fast(lhs_cells[lhs_subspace]);
+ });
rhs.map.each_map_entry([&](auto rhs_subspace, auto hash)
{
- auto lhs_subspace = lhs.map.lookup(hash);
- if (lhs_subspace == FastSparseMap::npos()) {
- auto idx = result.my_index.map.add_mapping(rhs.map.make_addr(rhs_subspace), hash);
- if (__builtin_expect((idx == result.my_cells.size), true)) {
- result.my_cells.push_back_fast(rhs_cells[rhs_subspace]);
- }
+ auto rhs_addr = rhs.map.get_addr(rhs_subspace);
+ auto result_subspace = result.my_index.map.lookup(rhs_addr, hash);
+ if (result_subspace == FastAddrMap::npos()) {
+ result.add_mapping(rhs_addr, hash);
+ result.my_cells.push_back_fast(rhs_cells[rhs_subspace]);
} else {
- auto cell_value = fun(lhs_cells[lhs_subspace], rhs_cells[rhs_subspace]);
- *result.my_cells.get(lhs_subspace) = cell_value;
+ OCT &out_cell = *result.my_cells.get(result_subspace);
+ out_cell = fun(out_cell, rhs_cells[rhs_subspace]);
}
});
return result;
diff --git a/eval/src/vespa/eval/eval/label.h b/eval/src/vespa/eval/eval/label.h
new file mode 100644
index 00000000000..931f96a4f1a
--- /dev/null
+++ b/eval/src/vespa/eval/eval/label.h
@@ -0,0 +1,15 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace vespalib::eval {
+
+// We use string ids from SharedStringRepo as labels. Note that
+// label_t represents the lightweight reference type. Other structures
+// (Handle/StrongHandles) are needed to keep the id valid.
+
+using label_t = uint32_t;
+
+}
diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp
index 113f89f77fb..0cbbb29ecf1 100644
--- a/eval/src/vespa/eval/eval/simple_value.cpp
+++ b/eval/src/vespa/eval/eval/simple_value.cpp
@@ -30,7 +30,8 @@ struct CreateSimpleValueBuilderBase {
// look up a full address in the map directly
struct SimpleLookupView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
@@ -38,17 +39,17 @@ struct SimpleLookupView : public Value::Index::View {
Map::const_iterator pos;
SimpleLookupView(const Map &map_in, size_t num_dims)
- : map(map_in), my_addr(num_dims, ""), pos(map.end()) {}
+ : map(map_in), my_addr(num_dims), pos(map.end()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == my_addr.size());
for (size_t i = 0; i < my_addr.size(); ++i) {
- my_addr[i] = *addr[i];
+ my_addr[i] = Handle::handle_from_id(*addr[i]);
}
pos = map.find(my_addr);
}
- bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*>, size_t &idx_out) override {
if (pos == map.end()) {
return false;
}
@@ -63,13 +64,14 @@ struct SimpleLookupView : public Value::Index::View {
// find matching mappings for a partial address with brute force filtering
struct SimpleFilterView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
std::vector<size_t> match_dims;
std::vector<size_t> extract_dims;
- std::vector<vespalib::string> query;
+ std::vector<Handle> query;
Map::const_iterator pos;
bool is_match() const {
@@ -82,7 +84,7 @@ struct SimpleFilterView : public Value::Index::View {
}
SimpleFilterView(const Map &map_in, const std::vector<size_t> &match_dims_in, size_t num_dims)
- : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size(), ""), pos(map.end())
+ : map(map_in), match_dims(match_dims_in), extract_dims(), query(match_dims.size()), pos(map.end())
{
auto my_pos = match_dims.begin();
for (size_t i = 0; i < num_dims; ++i) {
@@ -96,20 +98,20 @@ struct SimpleFilterView : public Value::Index::View {
assert((match_dims.size() + extract_dims.size()) == num_dims);
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
assert(addr.size() == query.size());
for (size_t i = 0; i < addr.size(); ++i) {
- query[i] = *addr[i];
+ query[i] = Handle::handle_from_id(*addr[i]);
}
pos = map.begin();
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
while (pos != map.end()) {
if (is_match()) {
assert(addr_out.size() == extract_dims.size());
for (size_t i = 0; i < extract_dims.size(); ++i) {
- *addr_out[i] = pos->first[extract_dims[i]];
+ *addr_out[i] = pos->first[extract_dims[i]].id();
}
idx_out = pos->second;
++pos;
@@ -126,7 +128,8 @@ struct SimpleFilterView : public Value::Index::View {
// iterate all mappings
struct SimpleIterateView : public Value::Index::View {
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
using Map = std::map<Labels, size_t>;
const Map &map;
@@ -135,17 +138,17 @@ struct SimpleIterateView : public Value::Index::View {
SimpleIterateView(const Map &map_in)
: map(map_in), pos(map.end()) {}
- void lookup(ConstArrayRef<const vespalib::stringref*>) override {
+ void lookup(ConstArrayRef<const label_t*>) override {
pos = map.begin();
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
if (pos == map.end()) {
return false;
}
assert(addr_out.size() == pos->first.size());
for (size_t i = 0; i < addr_out.size(); ++i) {
- *addr_out[i] = pos->first[i];
+ *addr_out[i] = pos->first[i].id();
}
idx_out = pos->second;
++pos;
@@ -182,6 +185,17 @@ SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr)
assert(was_inserted);
}
+void
+SimpleValue::add_mapping(ConstArrayRef<label_t> addr)
+{
+ Labels my_addr;
+ for(label_t label: addr) {
+ my_addr.emplace_back(Handle::handle_from_id(label));
+ }
+ auto [ignore, was_inserted] = _index.emplace(my_addr, _index.size());
+ assert(was_inserted);
+}
+
MemoryUsage
SimpleValue::estimate_extra_memory_usage() const
{
@@ -246,15 +260,26 @@ SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr)
return ArrayRef<T>(&_cells[old_size], subspace_size());
}
+template <typename T>
+ArrayRef<T>
+SimpleValueT<T>::add_subspace(ConstArrayRef<label_t> addr)
+{
+ size_t old_size = _cells.size();
+ add_mapping(addr);
+ _cells.resize(old_size + subspace_size(), std::numeric_limits<T>::quiet_NaN());
+ return ArrayRef<T>(&_cells[old_size], subspace_size());
+}
+
//-----------------------------------------------------------------------------
SimpleValueBuilderFactory::SimpleValueBuilderFactory() = default;
SimpleValueBuilderFactory SimpleValueBuilderFactory::_factory;
std::unique_ptr<ValueBuilderBase>
-SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size,
+SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, bool transient, size_t num_mapped_dims, size_t subspace_size,
size_t expected_subspaces) const
{
+ (void) transient;
return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces);
}
diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h
index 590c0b4ef16..1fd645b704c 100644
--- a/eval/src/vespa/eval/eval/simple_value.h
+++ b/eval/src/vespa/eval/eval/simple_value.h
@@ -3,7 +3,7 @@
#pragma once
#include "value.h"
-#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <vector>
#include <map>
@@ -26,7 +26,8 @@ class TensorSpec;
class SimpleValue : public Value, public Value::Index
{
private:
- using Labels = std::vector<vespalib::string>;
+ using Handle = SharedStringRepo::Handle;
+ using Labels = std::vector<Handle>;
ValueType _type;
size_t _num_mapped_dims;
@@ -36,6 +37,7 @@ protected:
size_t num_mapped_dims() const { return _num_mapped_dims; }
size_t subspace_size() const { return _subspace_size; }
void add_mapping(ConstArrayRef<vespalib::stringref> addr);
+ void add_mapping(ConstArrayRef<label_t> addr);
MemoryUsage estimate_extra_memory_usage() const;
public:
SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in);
@@ -62,6 +64,7 @@ public:
~SimpleValueT() override;
TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); }
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override;
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override;
std::unique_ptr<Value> build(std::unique_ptr<ValueBuilder<T>> self) override {
if (num_mapped_dims() == 0) {
assert(size() == 1);
@@ -87,7 +90,7 @@ class SimpleValueBuilderFactory : public ValueBuilderFactory {
private:
SimpleValueBuilderFactory();
static SimpleValueBuilderFactory _factory;
- std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override;
public:
static const SimpleValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/eval/value.cpp b/eval/src/vespa/eval/eval/value.cpp
index 7abc8d568cb..73c7c40636c 100644
--- a/eval/src/vespa/eval/eval/value.cpp
+++ b/eval/src/vespa/eval/eval/value.cpp
@@ -12,8 +12,8 @@ namespace {
struct TrivialView : Value::Index::View {
bool first = false;
- void lookup(ConstArrayRef<const vespalib::stringref*> ) override { first = true; }
- bool next_result(ConstArrayRef<vespalib::stringref*> , size_t &idx_out) override {
+ void lookup(ConstArrayRef<const label_t*> ) override { first = true; }
+ bool next_result(ConstArrayRef<label_t*> , size_t &idx_out) override {
if (first) {
idx_out = 0;
first = false;
diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h
index 186c3698dcd..2efb7d7c1e4 100644
--- a/eval/src/vespa/eval/eval/value.h
+++ b/eval/src/vespa/eval/eval/value.h
@@ -2,6 +2,7 @@
#pragma once
+#include "label.h"
#include "memory_usage_stuff.h"
#include "value_type.h"
#include "typed_cells.h"
@@ -36,13 +37,13 @@ struct Value {
// partial address for the dimensions given to
// create_view. Results from the lookup is extracted using
// the next_result function.
- virtual void lookup(ConstArrayRef<const vespalib::stringref*> addr) = 0;
+ virtual void lookup(ConstArrayRef<const label_t*> addr) = 0;
// Extract the next result (if any) from the previous
// lookup into the given partial address and index. Only
// the labels for the dimensions NOT specified in
// create_view will be extracted here.
- virtual bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) = 0;
+ virtual bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) = 0;
virtual ~View() {}
};
@@ -163,6 +164,14 @@ struct ValueBuilder : ValueBuilderBase {
// is not allowed.
virtual ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) = 0;
+ // add a dense subspace for the given address where labels are
+ // specified by shared string repo ids. Note that the caller is
+ // responsible for making sure the ids are valid 'long enough'.
+ virtual ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) = 0;
+
+ // convenience function to add a subspace with an empty address
+ ArrayRef<T> add_subspace() { return add_subspace(ConstArrayRef<label_t>()); }
+
// Given the ownership of the builder itself, produce the newly
// created value. This means that builders can only be used once,
// it also means values can build themselves.
@@ -179,26 +188,40 @@ struct ValueBuilder : ValueBuilderBase {
* builder. With interoperability between all values.
**/
struct ValueBuilderFactory {
+private:
template <typename T>
- std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type,
+ std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type, bool transient,
size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
{
assert(check_cell_type<T>(type.cell_type()));
- auto base = create_value_builder_base(type, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ auto base = create_value_builder_base(type, transient, num_mapped_dims_in, subspace_size_in, expected_subspaces);
ValueBuilder<T> *builder = dynamic_cast<ValueBuilder<T>*>(base.get());
assert(builder);
base.release();
return std::unique_ptr<ValueBuilder<T>>(builder);
}
+public:
+ template <typename T>
+ std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type,
+ size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
+ {
+ return create_value_builder<T>(type, false, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ }
+ template <typename T>
+ std::unique_ptr<ValueBuilder<T>> create_transient_value_builder(const ValueType &type,
+ size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const
+ {
+ return create_value_builder<T>(type, true, num_mapped_dims_in, subspace_size_in, expected_subspaces);
+ }
template <typename T>
std::unique_ptr<ValueBuilder<T>> create_value_builder(const ValueType &type) const
{
- return create_value_builder<T>(type, type.count_mapped_dimensions(), type.dense_subspace_size(), 1);
+ return create_value_builder<T>(type, false, type.count_mapped_dimensions(), type.dense_subspace_size(), 1);
}
std::unique_ptr<Value> copy(const Value &value) const;
virtual ~ValueBuilderFactory() {}
protected:
- virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
+ virtual std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type, bool transient,
size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const = 0;
};
diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp
index 923d3f29cd3..53131da86d8 100644
--- a/eval/src/vespa/eval/eval/value_codec.cpp
+++ b/eval/src/vespa/eval/eval/value_codec.cpp
@@ -7,6 +7,7 @@
#include <vespa/vespalib/util/exceptions.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
using vespalib::make_string_short::fmt;
@@ -128,9 +129,10 @@ size_t maybe_decode_num_blocks(nbostream &input, bool has_mapped_dims, const For
return 1;
}
-void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<vespalib::stringref> &addr) {
+void encode_mapped_labels(nbostream &output, size_t num_mapped_dims, const std::vector<label_t> &addr) {
for (size_t i = 0; i < num_mapped_dims; ++i) {
- output.writeSmallString(addr[i]);
+ vespalib::string str = SharedStringRepo::Handle::string_from_id(addr[i]);
+ output.writeSmallString(str);
}
}
@@ -175,7 +177,7 @@ struct ContentDecoder {
}
// add implicit empty subspace
if ((state.num_mapped_dims == 0) && (state.num_blocks == 0)) {
- for (T &cell: builder->add_subspace({})) {
+ for (T &cell: builder->add_subspace()) {
cell = T{};
}
}
@@ -229,8 +231,8 @@ struct CreateTensorSpecFromValue {
TensorSpec spec(value.type().to_spec());
size_t subspace_id = 0;
size_t subspace_size = value.type().dense_subspace_size();
- std::vector<vespalib::stringref> labels(value.type().count_mapped_dimensions());
- std::vector<vespalib::stringref*> label_refs;
+ std::vector<label_t> labels(value.type().count_mapped_dimensions());
+ std::vector<label_t*> label_refs;
for (auto &label: labels) {
label_refs.push_back(&label);
}
@@ -241,7 +243,7 @@ struct CreateTensorSpecFromValue {
TensorSpec::Address addr;
for (const auto &dim: value.type().dimensions()) {
if (dim.is_mapped()) {
- addr.emplace(dim.name, labels[label_idx++]);
+ addr.emplace(dim.name, SharedStringRepo::Handle::string_from_id(labels[label_idx++]));
}
}
for (size_t i = 0; i < subspace_size; ++i) {
@@ -270,8 +272,8 @@ struct EncodeState {
struct ContentEncoder {
template<typename T>
static void invoke(const Value &value, const EncodeState &state, nbostream &output) {
- std::vector<vespalib::stringref> address(state.num_mapped_dims);
- std::vector<vespalib::stringref*> a_refs(state.num_mapped_dims);;
+ std::vector<label_t> address(state.num_mapped_dims);
+ std::vector<label_t*> a_refs(state.num_mapped_dims);;
for (size_t i = 0; i < state.num_mapped_dims; ++i) {
a_refs[i] = &address[i];
}
diff --git a/eval/src/vespa/eval/instruction/generic_concat.cpp b/eval/src/vespa/eval/instruction/generic_concat.cpp
index fa9d2192b99..5d8ab7187c0 100644
--- a/eval/src/vespa/eval/instruction/generic_concat.cpp
+++ b/eval/src/vespa/eval/instruction/generic_concat.cpp
@@ -47,10 +47,10 @@ generic_concat(const Value &a, const Value &b,
auto a_cells = a.cells().typify<LCT>();
auto b_cells = b.cells().typify<RCT>();
SparseJoinState sparse(sparse_plan, a.index(), b.index());
- auto builder = factory.create_value_builder<OCT>(res_type,
- sparse_plan.sources.size(),
- dense_plan.output_size,
- sparse.first_index.size());
+ auto builder = factory.create_transient_value_builder<OCT>(res_type,
+ sparse_plan.sources.size(),
+ dense_plan.output_size,
+ sparse.first_index.size());
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
diff --git a/eval/src/vespa/eval/instruction/generic_create.cpp b/eval/src/vespa/eval/instruction/generic_create.cpp
index 02c89e0b43f..6e30da846e7 100644
--- a/eval/src/vespa/eval/instruction/generic_create.cpp
+++ b/eval/src/vespa/eval/instruction/generic_create.cpp
@@ -5,6 +5,7 @@
#include <vespa/eval/eval/array_array_map.h>
#include <vespa/vespalib/util/stash.h>
#include <vespa/vespalib/util/typify.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
using namespace vespalib::eval::tensor_function;
@@ -13,6 +14,7 @@ namespace vespalib::eval::instruction {
using State = InterpretedFunction::State;
using Instruction = InterpretedFunction::Instruction;
+using Handle = SharedStringRepo::Handle;
namespace {
@@ -21,12 +23,12 @@ struct CreateParam {
size_t num_mapped_dims;
size_t dense_subspace_size;
size_t num_children;
- ArrayArrayMap<vespalib::string,size_t> my_spec;
+ ArrayArrayMap<Handle,size_t> my_spec;
const ValueBuilderFactory &factory;
static constexpr size_t npos = -1;
- ArrayRef<size_t> indexes(ConstArrayRef<vespalib::string> key) {
+ ArrayRef<size_t> indexes(ConstArrayRef<Handle> key) {
auto [tag, first_time] = my_spec.lookup_or_add_entry(key);
auto rv = my_spec.get_values(tag);
if (first_time) {
@@ -49,7 +51,7 @@ struct CreateParam {
{
size_t last_child = num_children - 1;
for (const auto & entry : spec_in) {
- std::vector<vespalib::string> sparse_key;
+ std::vector<Handle> sparse_key;
size_t dense_key = 0;
auto dim = res_type.dimensions().begin();
auto binding = entry.first.begin();
@@ -58,7 +60,7 @@ struct CreateParam {
assert(dim->name == binding->first);
assert(dim->is_mapped() == binding->second.is_mapped());
if (dim->is_mapped()) {
- sparse_key.push_back(binding->second.name);
+ sparse_key.push_back(Handle(binding->second.name));
} else {
assert(binding->second.index < dim->size);
dense_key = (dense_key * dim->size) + binding->second.index;
@@ -76,16 +78,16 @@ struct CreateParam {
template <typename T>
void my_generic_create_op(State &state, uint64_t param_in) {
const auto &param = unwrap_param<CreateParam>(param_in);
- auto builder = param.factory.create_value_builder<T>(param.res_type,
- param.num_mapped_dims,
- param.dense_subspace_size,
- param.my_spec.size());
- std::vector<vespalib::stringref> sparse_addr;
+ auto builder = param.factory.create_transient_value_builder<T>(param.res_type,
+ param.num_mapped_dims,
+ param.dense_subspace_size,
+ param.my_spec.size());
+ std::vector<label_t> sparse_addr;
param.my_spec.each_entry([&](const auto &key, const auto &values)
{
sparse_addr.clear();
for (const auto & label : key) {
- sparse_addr.push_back(label);
+ sparse_addr.push_back(label.id());
}
T *dst = builder->add_subspace(sparse_addr).begin();
for (size_t stack_idx : values) {
diff --git a/eval/src/vespa/eval/instruction/generic_join.cpp b/eval/src/vespa/eval/instruction/generic_join.cpp
index 026df5aa993..e0dc0feea28 100644
--- a/eval/src/vespa/eval/instruction/generic_join.cpp
+++ b/eval/src/vespa/eval/instruction/generic_join.cpp
@@ -41,7 +41,7 @@ generic_mixed_join(const Value &lhs, const Value &rhs, const JoinParam &param)
if (param.sparse_plan.lhs_overlap.empty() && param.sparse_plan.rhs_overlap.empty()) {
expected_subspaces = sparse.first_index.size() * sparse.second_index.size();
}
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces);
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, expected_subspaces);
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
@@ -92,7 +92,7 @@ void my_sparse_no_overlap_join_op(State &state, uint64_t param_in) {
SparseJoinState sparse(param.sparse_plan, lhs.index(), rhs.index());
auto guess = lhs.index().size() * rhs.index().size();
assert(param.dense_plan.out_size == 1);
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess);
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), 1, guess);
auto outer = sparse.first_index.create_view({});
assert(sparse.second_view_dims.empty());
auto inner = sparse.second_index.create_view({});
@@ -131,7 +131,7 @@ void my_sparse_full_overlap_join_op(State &state, uint64_t param_in) {
}
Fun fun(param.function);
SparseJoinState sparse(param.sparse_plan, lhs_index, rhs_index);
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size());
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.sources.size(), param.dense_plan.out_size, sparse.first_index.size());
auto outer = sparse.first_index.create_view({});
auto inner = sparse.second_index.create_view(sparse.second_view_dims);
outer->lookup({});
diff --git a/eval/src/vespa/eval/instruction/generic_join.h b/eval/src/vespa/eval/instruction/generic_join.h
index 988286be980..217f3195dec 100644
--- a/eval/src/vespa/eval/instruction/generic_join.h
+++ b/eval/src/vespa/eval/instruction/generic_join.h
@@ -68,10 +68,10 @@ struct SparseJoinState {
const Value::Index &first_index;
const Value::Index &second_index;
const std::vector<size_t> &second_view_dims;
- std::vector<vespalib::stringref> full_address;
- std::vector<vespalib::stringref*> first_address;
- std::vector<const vespalib::stringref*> address_overlap;
- std::vector<vespalib::stringref*> second_only_address;
+ std::vector<label_t> full_address;
+ std::vector<label_t*> first_address;
+ std::vector<const label_t*> address_overlap;
+ std::vector<label_t*> second_only_address;
size_t lhs_subspace;
size_t rhs_subspace;
size_t &first_subspace;
diff --git a/eval/src/vespa/eval/instruction/generic_merge.cpp b/eval/src/vespa/eval/instruction/generic_merge.cpp
index 02749a04eb9..107cb805d74 100644
--- a/eval/src/vespa/eval/instruction/generic_merge.cpp
+++ b/eval/src/vespa/eval/instruction/generic_merge.cpp
@@ -63,10 +63,10 @@ generic_mixed_merge(const Value &a, const Value &b,
const size_t num_mapped = params.num_mapped_dimensions;
const size_t subspace_size = params.dense_subspace_size;
size_t guess_subspaces = std::max(a.index().size(), b.index().size());
- auto builder = params.factory.create_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces);
- std::vector<vespalib::stringref> address(num_mapped);
- std::vector<const vespalib::stringref *> addr_cref;
- std::vector<vespalib::stringref *> addr_ref;
+ auto builder = params.factory.create_transient_value_builder<OCT>(params.res_type, num_mapped, subspace_size, guess_subspaces);
+ std::vector<label_t> address(num_mapped);
+ std::vector<const label_t *> addr_cref;
+ std::vector<label_t *> addr_ref;
for (auto & ref : address) {
addr_cref.push_back(&ref);
addr_ref.push_back(&ref);
diff --git a/eval/src/vespa/eval/instruction/generic_peek.cpp b/eval/src/vespa/eval/instruction/generic_peek.cpp
index 66538911890..d94742ae15c 100644
--- a/eval/src/vespa/eval/instruction/generic_peek.cpp
+++ b/eval/src/vespa/eval/instruction/generic_peek.cpp
@@ -7,6 +7,7 @@
#include <vespa/vespalib/util/stash.h>
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/visit_ranges.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include <cassert>
using namespace vespalib::eval::tensor_function;
@@ -16,6 +17,8 @@ namespace vespalib::eval::instruction {
using State = InterpretedFunction::State;
using Instruction = InterpretedFunction::Instruction;
+using Handle = SharedStringRepo::Handle;
+
namespace {
static constexpr size_t npos = -1;
@@ -35,28 +38,43 @@ size_t count_children(const Spec &spec)
}
struct DimSpec {
- vespalib::stringref name;
- GenericPeek::SpecMap::mapped_type child_or_label;
+ enum class DimType { CHILD_IDX, LABEL_IDX, LABEL_STR };
+ vespalib::string name;
+ DimType dim_type;
+ size_t idx;
+ Handle str;
+ static DimSpec from_child(const vespalib::string &name_in, size_t child_idx) {
+ return {name_in, DimType::CHILD_IDX, child_idx, Handle()};
+ }
+ static DimSpec from_label(const vespalib::string &name_in, const TensorSpec::Label &label) {
+ if (label.is_mapped()) {
+ return {name_in, DimType::LABEL_STR, 0, Handle(label.name)};
+ } else {
+ assert(label.is_indexed());
+ return {name_in, DimType::LABEL_IDX, label.index, Handle()};
+ }
+ }
+ ~DimSpec();
bool has_child() const {
- return std::holds_alternative<size_t>(child_or_label);
+ return (dim_type == DimType::CHILD_IDX);
}
bool has_label() const {
- return std::holds_alternative<TensorSpec::Label>(child_or_label);
+ return (dim_type != DimType::CHILD_IDX);
}
size_t get_child_idx() const {
- return std::get<size_t>(child_or_label);
+ assert(dim_type == DimType::CHILD_IDX);
+ return idx;
}
- vespalib::stringref get_label_name() const {
- auto & label = std::get<TensorSpec::Label>(child_or_label);
- assert(label.is_mapped());
- return label.name;
+ label_t get_label_name() const {
+ assert(dim_type == DimType::LABEL_STR);
+ return str.id();
}
size_t get_label_index() const {
- auto & label = std::get<TensorSpec::Label>(child_or_label);
- assert(label.is_indexed());
- return label.index;
+ assert(dim_type == DimType::LABEL_IDX);
+ return idx;
}
};
+DimSpec::~DimSpec() = default;
struct ExtractedSpecs {
using Dimension = ValueType::Dimension;
@@ -85,7 +103,11 @@ struct ExtractedSpecs {
dimensions.push_back(a);
const auto & [spec_dim_name, child_or_label] = b;
assert(a.name == spec_dim_name);
- specs.emplace_back(DimSpec{a.name, child_or_label});
+ if (std::holds_alternative<size_t>(child_or_label)) {
+ specs.push_back(DimSpec::from_child(a.name, std::get<size_t>(child_or_label)));
+ } else {
+ specs.push_back(DimSpec::from_label(a.name, std::get<TensorSpec::Label>(child_or_label)));
+ }
}
}
};
@@ -181,22 +203,21 @@ struct DensePlan {
};
struct SparseState {
- std::vector<vespalib::string> view_addr;
- std::vector<vespalib::stringref> view_refs;
- std::vector<const vespalib::stringref *> lookup_refs;
- std::vector<vespalib::stringref> output_addr;
- std::vector<vespalib::stringref *> fetch_addr;
-
- SparseState(std::vector<vespalib::string> view_addr_in, size_t out_dims)
- : view_addr(std::move(view_addr_in)),
- view_refs(view_addr.size()),
+ std::vector<Handle> handles;
+ std::vector<label_t> view_addr;
+ std::vector<const label_t *> lookup_refs;
+ std::vector<label_t> output_addr;
+ std::vector<label_t *> fetch_addr;
+
+ SparseState(std::vector<Handle> handles_in, std::vector<label_t> view_addr_in, size_t out_dims)
+ : handles(std::move(handles_in)),
+ view_addr(std::move(view_addr_in)),
lookup_refs(view_addr.size()),
output_addr(out_dims),
fetch_addr(out_dims)
{
for (size_t i = 0; i < view_addr.size(); ++i) {
- view_refs[i] = view_addr[i];
- lookup_refs[i] = &view_refs[i];
+ lookup_refs[i] = &view_addr[i];
}
for (size_t i = 0; i < out_dims; ++i) {
fetch_addr[i] = &output_addr[i];
@@ -236,17 +257,19 @@ struct SparsePlan {
template <typename Getter>
SparseState make_state(const Getter &get_child_value) const {
- std::vector<vespalib::string> view_addr;
+ std::vector<Handle> handles;
+ std::vector<label_t> view_addr;
for (const auto & dim : lookup_specs) {
if (dim.has_child()) {
int64_t child_value = get_child_value(dim.get_child_idx());
- view_addr.push_back(vespalib::make_string("%" PRId64, child_value));
+ handles.emplace_back(vespalib::make_string("%" PRId64, child_value));
+ view_addr.push_back(handles.back().id());
} else {
view_addr.push_back(dim.get_label_name());
}
}
assert(view_addr.size() == view_dims.size());
- return SparseState(std::move(view_addr), out_mapped_dims);
+ return SparseState(std::move(handles), std::move(view_addr), out_mapped_dims);
}
};
SparsePlan::~SparsePlan() = default;
@@ -284,10 +307,10 @@ generic_mixed_peek(const ValueType &res_type,
{
auto input_cells = input_value.cells().typify<ICT>();
size_t bad_guess = 1;
- auto builder = factory.create_value_builder<OCT>(res_type,
- sparse_plan.out_mapped_dims,
- dense_plan.out_dense_size,
- bad_guess);
+ auto builder = factory.create_transient_value_builder<OCT>(res_type,
+ sparse_plan.out_mapped_dims,
+ dense_plan.out_dense_size,
+ bad_guess);
size_t filled_subspaces = 0;
size_t dense_offset = dense_plan.get_offset(get_child_value);
if (dense_offset != npos) {
@@ -304,7 +327,7 @@ generic_mixed_peek(const ValueType &res_type,
}
}
if ((sparse_plan.out_mapped_dims == 0) && (filled_subspaces == 0)) {
- for (auto & v : builder->add_subspace({})) {
+ for (auto & v : builder->add_subspace()) {
v = OCT{};
}
}
diff --git a/eval/src/vespa/eval/instruction/generic_reduce.cpp b/eval/src/vespa/eval/instruction/generic_reduce.cpp
index afc46e8ee7d..d30186d3dd8 100644
--- a/eval/src/vespa/eval/instruction/generic_reduce.cpp
+++ b/eval/src/vespa/eval/instruction/generic_reduce.cpp
@@ -45,10 +45,10 @@ ReduceParam::~ReduceParam() = default;
//-----------------------------------------------------------------------------
struct SparseReduceState {
- std::vector<vespalib::stringref> full_address;
- std::vector<vespalib::stringref*> fetch_address;
- std::vector<vespalib::stringref*> keep_address;
- size_t subspace;
+ std::vector<label_t> full_address;
+ std::vector<label_t*> fetch_address;
+ std::vector<label_t*> keep_address;
+ size_t subspace;
SparseReduceState(const SparseReducePlan &plan)
: full_address(plan.keep_dims.size() + plan.num_reduce_dims),
@@ -71,20 +71,20 @@ template <typename ICT, typename OCT, typename AGGR>
Value::UP
generic_reduce(const Value &value, const ReduceParam &param) {
auto cells = value.cells().typify<ICT>();
- ArrayArrayMap<vespalib::stringref,AGGR> map(param.sparse_plan.keep_dims.size(),
- param.dense_plan.out_size,
- value.index().size());
+ ArrayArrayMap<label_t,AGGR> map(param.sparse_plan.keep_dims.size(),
+ param.dense_plan.out_size,
+ value.index().size());
SparseReduceState sparse(param.sparse_plan);
auto full_view = value.index().create_view({});
full_view->lookup({});
- ConstArrayRef<vespalib::stringref*> keep_addr(sparse.keep_address);
+ ConstArrayRef<label_t*> keep_addr(sparse.keep_address);
while (full_view->next_result(sparse.fetch_address, sparse.subspace)) {
auto [tag, ignore] = map.lookup_or_add_entry(keep_addr);
AGGR *dst = map.get_values(tag).begin();
auto sample = [&](size_t src_idx, size_t dst_idx) { dst[dst_idx].sample(cells[src_idx]); };
param.dense_plan.execute(sparse.subspace * param.dense_plan.in_size, sample);
}
- auto builder = param.factory.create_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size());
+ auto builder = param.factory.create_transient_value_builder<OCT>(param.res_type, param.sparse_plan.keep_dims.size(), param.dense_plan.out_size, map.size());
map.each_entry([&](const auto &keys, const auto &values)
{
OCT *dst = builder->add_subspace(keys).begin();
@@ -93,7 +93,7 @@ generic_reduce(const Value &value, const ReduceParam &param) {
}
});
if ((map.size() == 0) && param.sparse_plan.keep_dims.empty()) {
- auto zero = builder->add_subspace({});
+ auto zero = builder->add_subspace();
for (size_t i = 0; i < zero.size(); ++i) {
zero[i] = OCT{};
}
diff --git a/eval/src/vespa/eval/instruction/generic_rename.cpp b/eval/src/vespa/eval/instruction/generic_rename.cpp
index 1ce18597ec2..894ef37b678 100644
--- a/eval/src/vespa/eval/instruction/generic_rename.cpp
+++ b/eval/src/vespa/eval/instruction/generic_rename.cpp
@@ -69,15 +69,15 @@ generic_rename(const Value &a,
const ValueType &res_type, const ValueBuilderFactory &factory)
{
auto cells = a.cells().typify<CT>();
- std::vector<vespalib::stringref> output_address(sparse_plan.mapped_dims);
- std::vector<vespalib::stringref*> input_address;
+ std::vector<label_t> output_address(sparse_plan.mapped_dims);
+ std::vector<label_t*> input_address;
for (size_t maps_to : sparse_plan.output_dimensions) {
input_address.push_back(&output_address[maps_to]);
}
- auto builder = factory.create_value_builder<CT>(res_type,
- sparse_plan.mapped_dims,
- dense_plan.subspace_size,
- a.index().size());
+ auto builder = factory.create_transient_value_builder<CT>(res_type,
+ sparse_plan.mapped_dims,
+ dense_plan.subspace_size,
+ a.index().size());
auto view = a.index().create_view({});
view->lookup({});
size_t subspace;
diff --git a/eval/src/vespa/eval/streamed/streamed_value.cpp b/eval/src/vespa/eval/streamed/streamed_value.cpp
index bdfe5fd4e27..06162b2200d 100644
--- a/eval/src/vespa/eval/streamed/streamed_value.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value.cpp
@@ -16,8 +16,7 @@ StreamedValue<T>::get_memory_usage() const
{
MemoryUsage usage = self_memory_usage<StreamedValue<T>>();
usage.merge(vector_extra_memory_usage(_my_cells));
- usage.incUsedBytes(_label_buf.byteSize());
- usage.incAllocatedBytes(_label_buf.byteCapacity());
+ usage.merge(vector_extra_memory_usage(_my_labels.view().handles()));
return usage;
}
diff --git a/eval/src/vespa/eval/streamed/streamed_value.h b/eval/src/vespa/eval/streamed/streamed_value.h
index 258802a53e8..b7ace4191c3 100644
--- a/eval/src/vespa/eval/streamed/streamed_value.h
+++ b/eval/src/vespa/eval/streamed/streamed_value.h
@@ -4,6 +4,7 @@
#include <vespa/eval/eval/value_type.h>
#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
#include "streamed_value_index.h"
#include <cassert>
@@ -19,20 +20,22 @@ template <typename T>
class StreamedValue : public Value
{
private:
+ using StrongHandles = SharedStringRepo::StrongHandles;
+
ValueType _type;
std::vector<T> _my_cells;
- Array<char> _label_buf;
+ StrongHandles _my_labels;
StreamedValueIndex _my_index;
public:
StreamedValue(ValueType type, size_t num_mapped_dimensions,
- std::vector<T> cells, size_t num_subspaces, Array<char> && label_buf)
+ std::vector<T> cells, size_t num_subspaces, StrongHandles && handles)
: _type(std::move(type)),
_my_cells(std::move(cells)),
- _label_buf(std::move(label_buf)),
+ _my_labels(std::move(handles)),
_my_index(num_mapped_dimensions,
num_subspaces,
- ConstArrayRef<char>(_label_buf.begin(), _label_buf.size()))
+ _my_labels.view().handles())
{
assert(num_subspaces * _type.dense_subspace_size() == _my_cells.size());
}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder.h b/eval/src/vespa/eval/streamed/streamed_value_builder.h
index 5698c805756..48a01f893de 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder.h
@@ -3,7 +3,7 @@
#pragma once
#include "streamed_value.h"
-#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace vespalib::eval {
@@ -14,12 +14,14 @@ template <typename T>
class StreamedValueBuilder : public ValueBuilder<T>
{
private:
+ using StrongHandles = SharedStringRepo::StrongHandles;
+
ValueType _type;
size_t _num_mapped_dimensions;
size_t _dense_subspace_size;
std::vector<T> _cells;
size_t _num_subspaces;
- nbostream _labels;
+ StrongHandles _labels;
public:
StreamedValueBuilder(const ValueType &type,
size_t num_mapped_in,
@@ -30,18 +32,26 @@ public:
_dense_subspace_size(subspace_size_in),
_cells(),
_num_subspaces(0),
- _labels()
+ _labels(num_mapped_in * expected_subspaces)
{
_cells.reserve(subspace_size_in * expected_subspaces);
- // assume small sized label strings:
- _labels.reserve(num_mapped_in * expected_subspaces * 3);
};
~StreamedValueBuilder();
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override {
for (auto label : addr) {
- _labels.writeSmallString(label);
+ _labels.add(label);
+ }
+ size_t old_sz = _cells.size();
+ _cells.resize(old_sz + _dense_subspace_size);
+ _num_subspaces++;
+ return ArrayRef<T>(&_cells[old_sz], _dense_subspace_size);
+ }
+
+ ArrayRef<T> add_subspace(ConstArrayRef<label_t> addr) override {
+ for (auto label : addr) {
+ _labels.add(label);
}
size_t old_sz = _cells.size();
_cells.resize(old_sz + _dense_subspace_size);
@@ -58,7 +68,7 @@ public:
_num_mapped_dimensions,
std::move(_cells),
_num_subspaces,
- _labels.extract_buffer());
+ std::move(_labels));
}
};
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
index aa6347a2c51..5111ba8a71e 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.cpp
@@ -19,10 +19,12 @@ struct SelectStreamedValueBuilder {
std::unique_ptr<ValueBuilderBase>
StreamedValueBuilderFactory::create_value_builder_base(const ValueType &type,
+ bool transient,
size_t num_mapped,
size_t subspace_size,
size_t expected_subspaces) const
{
+ (void) transient;
return typify_invoke<1,TypifyCellType,SelectStreamedValueBuilder>(
type.cell_type(),
type, num_mapped, subspace_size, expected_subspaces);
diff --git a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
index 3f81981f429..58072aa31dc 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_builder_factory.h
@@ -14,7 +14,7 @@ private:
StreamedValueBuilderFactory() {}
static StreamedValueBuilderFactory _factory;
std::unique_ptr<ValueBuilderBase> create_value_builder_base(
- const ValueType &type, size_t num_mapped_in,
+ const ValueType &type, bool transient, size_t num_mapped_in,
size_t subspace_size_in, size_t expected_subspaces) const override;
public:
static const StreamedValueBuilderFactory &get() { return _factory; }
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.cpp b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
index 17cf7316554..0adaa35fc84 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_index.cpp
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.cpp
@@ -18,7 +18,7 @@ struct StreamedFilterView : Value::Index::View
{
LabelBlockStream label_blocks;
std::vector<size_t> view_dims;
- std::vector<vespalib::stringref> to_match;
+ std::vector<label_t> to_match;
StreamedFilterView(LabelBlockStream labels, std::vector<size_t> view_dims_in)
: label_blocks(std::move(labels)),
@@ -28,7 +28,7 @@ struct StreamedFilterView : Value::Index::View
to_match.reserve(view_dims.size());
}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
label_blocks.reset();
to_match.clear();
for (auto ptr : addr) {
@@ -37,7 +37,7 @@ struct StreamedFilterView : Value::Index::View
assert(view_dims.size() == to_match.size());
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
while (const auto block = label_blocks.next_block()) {
idx_out = block.subspace_index;
bool matches = true;
@@ -66,12 +66,12 @@ struct StreamedIterationView : Value::Index::View
: label_blocks(std::move(labels))
{}
- void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ void lookup(ConstArrayRef<const label_t*> addr) override {
label_blocks.reset();
assert(addr.size() == 0);
}
- bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ bool next_result(ConstArrayRef<label_t*> addr_out, size_t &idx_out) override {
if (auto block = label_blocks.next_block()) {
idx_out = block.subspace_index;
size_t i = 0;
@@ -90,7 +90,7 @@ struct StreamedIterationView : Value::Index::View
std::unique_ptr<Value::Index::View>
StreamedValueIndex::create_view(const std::vector<size_t> &dims) const
{
- LabelBlockStream label_stream(_data.num_subspaces, _data.labels_buffer, _data.num_mapped_dims);
+ LabelBlockStream label_stream(_data.num_subspaces, _data.labels, _data.num_mapped_dims);
if (dims.empty()) {
return std::make_unique<StreamedIterationView>(std::move(label_stream));
}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_index.h b/eval/src/vespa/eval/streamed/streamed_value_index.h
index 8fd561200c3..fb3d48d8176 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_index.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_index.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/util/shared_string_repo.h>
namespace vespalib::eval {
@@ -13,19 +14,23 @@ namespace vespalib::eval {
class StreamedValueIndex : public Value::Index
{
public:
+
struct SerializedDataRef {
uint32_t num_mapped_dims;
uint32_t num_subspaces;
- ConstArrayRef<char> labels_buffer;
+ const std::vector<label_t> labels;
};
- StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, ConstArrayRef<char> labels_buf)
- : _data{num_mapped_dims, num_subspaces, labels_buf}
+ StreamedValueIndex(uint32_t num_mapped_dims, uint32_t num_subspaces, const std::vector<label_t> &labels_in)
+ : _data{num_mapped_dims, num_subspaces, labels_in}
{}
// index API:
size_t size() const override { return _data.num_subspaces; }
std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
+ // NB NOTE WARNING XXX: simply serializing the handle view and
+ // discarding the backing streamed value will result in dangling
+ // string enum value usage when the value is later deserialized.
SerializedDataRef get_data_reference() const { return _data; }
private:
diff --git a/eval/src/vespa/eval/streamed/streamed_value_utils.h b/eval/src/vespa/eval/streamed/streamed_value_utils.h
index b88d4df8581..6b44e052f0c 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_utils.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_utils.h
@@ -4,24 +4,23 @@
#include <vespa/eval/eval/value.h>
#include <vespa/vespalib/objects/nbostream.h>
+#include <cassert>
namespace vespalib::eval {
/**
* Reads a stream of serialized labels.
- * Reading more labels than available will
- * throw an exception.
+ * Reading more labels than available will trigger an assert.
**/
struct LabelStream {
- nbostream source;
- LabelStream(ConstArrayRef<char> data) : source(data.begin(), data.size()) {}
- vespalib::stringref next_label() {
- size_t str_size = source.getInt1_4Bytes();
- vespalib::stringref label(source.peek(), str_size);
- source.adjustReadPos(str_size);
- return label;
+ const std::vector<label_t> &source;
+ size_t pos;
+ LabelStream(const std::vector<label_t> &data) : source(data), pos(0) {}
+ label_t next_label() {
+ assert(pos < source.size());
+ return source[pos++];
}
- void reset() { source.rp(0); }
+ void reset() { pos = 0; }
};
/**
@@ -30,7 +29,7 @@ struct LabelStream {
struct LabelBlock {
static constexpr size_t npos = -1;
size_t subspace_index;
- ConstArrayRef<vespalib::stringref> address;
+ ConstArrayRef<label_t> address;
operator bool() const { return subspace_index != npos; }
};
@@ -43,7 +42,7 @@ private:
size_t _num_subspaces;
LabelStream _labels;
size_t _subspace_index;
- std::vector<vespalib::stringref> _current_address;
+ std::vector<label_t> _current_address;
public:
LabelBlock next_block() {
if (_subspace_index < _num_subspaces) {
@@ -62,10 +61,10 @@ public:
}
LabelBlockStream(uint32_t num_subspaces,
- ConstArrayRef<char> label_buf,
+ const std::vector<label_t> &labels,
uint32_t num_mapped_dims)
: _num_subspaces(num_subspaces),
- _labels(label_buf),
+ _labels(labels),
_subspace_index(num_subspaces),
_current_address(num_mapped_dims)
{}
diff --git a/eval/src/vespa/eval/streamed/streamed_value_view.h b/eval/src/vespa/eval/streamed/streamed_value_view.h
index e37f442dd9a..060961f5e16 100644
--- a/eval/src/vespa/eval/streamed/streamed_value_view.h
+++ b/eval/src/vespa/eval/streamed/streamed_value_view.h
@@ -24,10 +24,10 @@ private:
public:
StreamedValueView(const ValueType &type, size_t num_mapped_dimensions,
TypedCells cells, size_t num_subspaces,
- ConstArrayRef<char> labels_buf)
+ const std::vector<label_t> &labels)
: _type(type),
_cells_ref(cells),
- _my_index(num_mapped_dimensions, num_subspaces, labels_buf)
+ _my_index(num_mapped_dimensions, num_subspaces, labels)
{
assert(num_subspaces * _type.dense_subspace_size() == _cells_ref.size);
}