summaryrefslogtreecommitdiffstats
path: root/eval/src
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <3535158+havardpe@users.noreply.github.com>2020-10-05 15:32:09 +0200
committerGitHub <noreply@github.com>2020-10-05 15:32:09 +0200
commitd7a7d903dc80ad63f77314a73ee718dad6b68e7d (patch)
treec12002259a2753f1de9b24072e4654081aa1ff22 /eval/src
parent83df87bf1ace88517ab6858e5c0a8c45ce7ad5ce (diff)
parent20dfd21bf86bce07cbc7af2be4af98d33e7173a7 (diff)
Merge pull request #14724 from vespa-engine/havardpe/simple-sparse-map
simple sparse map, used by simple value
Diffstat (limited to 'eval/src')
-rw-r--r--eval/src/tests/eval/simple_sparse_map/CMakeLists.txt9
-rw-r--r--eval/src/tests/eval/simple_sparse_map/simple_sparse_map_test.cpp96
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/simple_sparse_map.cpp12
-rw-r--r--eval/src/vespa/eval/eval/simple_sparse_map.h202
-rw-r--r--eval/src/vespa/eval/eval/simple_value.cpp177
-rw-r--r--eval/src/vespa/eval/eval/simple_value.h20
-rw-r--r--eval/src/vespa/eval/eval/value.h4
8 files changed, 442 insertions, 79 deletions
diff --git a/eval/src/tests/eval/simple_sparse_map/CMakeLists.txt b/eval/src/tests/eval/simple_sparse_map/CMakeLists.txt
new file mode 100644
index 00000000000..e3d2885e426
--- /dev/null
+++ b/eval/src/tests/eval/simple_sparse_map/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_simple_sparse_map_test_app TEST
+ SOURCES
+ simple_sparse_map_test.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
+vespa_add_test(NAME eval_simple_sparse_map_test_app COMMAND eval_simple_sparse_map_test_app)
diff --git a/eval/src/tests/eval/simple_sparse_map/simple_sparse_map_test.cpp b/eval/src/tests/eval/simple_sparse_map/simple_sparse_map_test.cpp
new file mode 100644
index 00000000000..a8ddeeab349
--- /dev/null
+++ b/eval/src/tests/eval/simple_sparse_map/simple_sparse_map_test.cpp
@@ -0,0 +1,96 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/eval/simple_sparse_map.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+
+class StringList {
+private:
+ std::vector<vespalib::string> _str_list;
+ std::vector<vespalib::stringref> _ref_list;
+ std::vector<const vespalib::stringref *> _ref_ptr_list;
+public:
+ StringList(const std::vector<vespalib::string> &list)
+ : _str_list(list), _ref_list(), _ref_ptr_list()
+ {
+ for (const auto &str: _str_list) {
+ _ref_list.emplace_back(str);
+ }
+ for (const auto &ref: _ref_list) {
+ _ref_ptr_list.push_back(&ref);
+ }
+ }
+ ~StringList();
+ const std::vector<vespalib::string> direct_str() const { return _str_list; }
+ ConstArrayRef<vespalib::stringref> direct_ref() const { return _ref_list; }
+ ConstArrayRef<const vespalib::stringref *> indirect_ref() const { return _ref_ptr_list; }
+};
+StringList::~StringList() = default;
+using SL = StringList;
+
+TEST(SimpleSparseMapTest, simple_sparse_map_basic_usage_works) {
+ SL a1({"a","a","a"});
+ SL a2({"a","a","b"});
+ SL a3({"a","b","a"});
+ SL a4({"b","a","a"});
+ SimpleSparseMap map(3, 128);
+ EXPECT_EQ(map.size(), 0);
+ map.add_mapping(a1.direct_str());
+ map.add_mapping(a2.direct_ref());
+ map.add_mapping(a3.indirect_ref());
+ EXPECT_EQ(map.size(), 3);
+ EXPECT_EQ(map.lookup(a1.direct_str()), 0);
+ EXPECT_EQ(map.lookup(a1.direct_ref()), 0);
+ EXPECT_EQ(map.lookup(a1.indirect_ref()), 0);
+ EXPECT_EQ(map.lookup(a2.direct_str()), 1);
+ EXPECT_EQ(map.lookup(a2.direct_ref()), 1);
+ EXPECT_EQ(map.lookup(a2.indirect_ref()), 1);
+ EXPECT_EQ(map.lookup(a3.direct_str()), 2);
+ EXPECT_EQ(map.lookup(a3.direct_ref()), 2);
+ EXPECT_EQ(map.lookup(a3.indirect_ref()), 2);
+ EXPECT_EQ(map.lookup(a4.direct_str()), map.npos());
+ EXPECT_EQ(map.lookup(a4.direct_ref()), map.npos());
+ EXPECT_EQ(map.lookup(a4.indirect_ref()), map.npos());
+ EXPECT_EQ(SimpleSparseMap::npos(), map.npos());
+ SL expect_labels({"a","a","a",
+ "a","a","b",
+ "a","b","a"});
+ EXPECT_EQ(map.labels(), expect_labels.direct_str());
+}
+
+TEST(SimpleSparseMapTest, simple_sparse_map_works_with_no_labels) {
+ SL empty({});
+ SimpleSparseMap map1(0, 1);
+ SimpleSparseMap map2(0, 1);
+ SimpleSparseMap map3(0, 1);
+ EXPECT_EQ(map1.size(), 0);
+ EXPECT_EQ(map2.size(), 0);
+ EXPECT_EQ(map3.size(), 0);
+ map1.add_mapping(empty.direct_str());
+ map2.add_mapping(empty.direct_ref());
+ map3.add_mapping(empty.indirect_ref());
+ EXPECT_EQ(map1.size(), 1);
+ EXPECT_EQ(map2.size(), 1);
+ EXPECT_EQ(map3.size(), 1);
+ EXPECT_EQ(map1.lookup(empty.direct_str()), 0);
+ EXPECT_EQ(map1.lookup(empty.direct_ref()), 0);
+ EXPECT_EQ(map1.lookup(empty.indirect_ref()), 0);
+ EXPECT_EQ(map2.lookup(empty.direct_str()), 0);
+ EXPECT_EQ(map2.lookup(empty.direct_ref()), 0);
+ EXPECT_EQ(map2.lookup(empty.indirect_ref()), 0);
+ EXPECT_EQ(map3.lookup(empty.direct_str()), 0);
+ EXPECT_EQ(map3.lookup(empty.direct_ref()), 0);
+ EXPECT_EQ(map3.lookup(empty.indirect_ref()), 0);
+ EXPECT_EQ(map1.labels().size(), 0);
+ EXPECT_EQ(map2.labels().size(), 0);
+ EXPECT_EQ(map3.labels().size(), 0);
+}
+
+TEST(SimpleSparseMapTest, size_of_internal_types) {
+ fprintf(stderr, "simple sparse map hash node size: %zu\n", sizeof(hash_node<SimpleSparseMap::MapType::value_type>));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index fa19f80c4af..e2bcee94498 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -19,6 +19,7 @@ vespa_add_library(eval_eval OBJECT
operation.cpp
operator_nodes.cpp
param_usage.cpp
+ simple_sparse_map.cpp
simple_tensor.cpp
simple_tensor_engine.cpp
simple_value.cpp
diff --git a/eval/src/vespa/eval/eval/simple_sparse_map.cpp b/eval/src/vespa/eval/eval/simple_sparse_map.cpp
new file mode 100644
index 00000000000..18d3bffd47d
--- /dev/null
+++ b/eval/src/vespa/eval/eval/simple_sparse_map.cpp
@@ -0,0 +1,12 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "simple_sparse_map.h"
+#include <vespa/vespalib/stllike/hash_map.hpp>
+
+namespace vespalib::eval {
+
+SimpleSparseMap::~SimpleSparseMap() = default;
+
+}
+
+VESPALIB_HASH_MAP_INSTANTIATE_H_E(vespalib::eval::SimpleSparseMap::Key, uint32_t, vespalib::eval::SimpleSparseMap::Hash, vespalib::eval::SimpleSparseMap::Equal);
diff --git a/eval/src/vespa/eval/eval/simple_sparse_map.h b/eval/src/vespa/eval/eval/simple_sparse_map.h
new file mode 100644
index 00000000000..61ff9f326e2
--- /dev/null
+++ b/eval/src/vespa/eval/eval/simple_sparse_map.h
@@ -0,0 +1,202 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/arrayref.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vector>
+#include <cassert>
+
+namespace vespalib::eval {
+
+/**
+ * A simple wrapper around vespalib::hash_map, using it to map a list
+ * of labels (a sparse address) to an integer value (dense subspace
+ * index). Labels are stored in a separate vector and the map keys
+ * reference a slice of this vector. This is to avoid fragmentation
+ * caused by hash keys being vectors of values. In addition, labels
+ * can be specified in different ways during lookup and insert in
+ * order to reduce the need for data restructuring when using the
+ * map. To keep things simple, map iterators are kept away from the
+ * api. This will have a minor overhead during lookup since the end
+ * iterator needs to be translated to npos. All added mappings are
+ * checked for uniqueness with an assert. There is no real need for
+ * map entry iteration since you can just iterate the labels vector
+ * directly.
+ *
+ * 'add_mapping' will will bind the given address to an integer value
+ * equal to the current (pre-insert) size of the map. The given
+ * address MUST NOT already be in the map.
+ *
+ * 'lookup' will return the integer value associated with the
+ * given address or a special npos value if the value is not found.
+ **/
+class SimpleSparseMap
+{
+public:
+ using DirectStr = ConstArrayRef<vespalib::string>;
+ using DirectRef = ConstArrayRef<vespalib::stringref>;
+ using IndirectRef = ConstArrayRef<const vespalib::stringref *>;
+
+ struct Key {
+ uint32_t start;
+ uint32_t end;
+ Key() : start(0), end(0) {}
+ Key(uint32_t start_in, uint32_t end_in)
+ : start(start_in), end(end_in) {}
+ };
+
+ struct Hash {
+ const std::vector<vespalib::string> *labels;
+ const vespalib::string &get_label(size_t i) const { return (*labels)[i]; }
+ Hash() : labels(nullptr) {}
+ Hash(const Hash &rhs) = default;
+ Hash &operator=(const Hash &rhs) = default;
+ Hash(const std::vector<vespalib::string> &labels_in) : labels(&labels_in) {}
+ size_t operator()(const Key &key) const {
+ size_t h = 0;
+ for (size_t i = key.start; i < key.end; ++i) {
+ const vespalib::string &str = get_label(i);
+ h = h * 31 + hashValue(str.data(), str.size());
+ }
+ return h;
+ }
+ size_t operator()(const DirectStr &addr) const {
+ size_t h = 0;
+ for (const auto &str: addr) {
+ h = h * 31 + hashValue(str.data(), str.size());
+ }
+ return h;
+ }
+ size_t operator()(const DirectRef &addr) const {
+ size_t h = 0;
+ for (const auto &str: addr) {
+ h = h * 31 + hashValue(str.data(), str.size());
+ }
+ return h;
+ }
+ size_t operator()(const IndirectRef &addr) const {
+ size_t h = 0;
+ for (const auto *str: addr) {
+ h = h * 31 + hashValue(str->data(), str->size());
+ }
+ return h;
+ }
+ };
+
+ struct Equal {
+ const std::vector<vespalib::string> *labels;
+ const vespalib::string &get_label(size_t i) const { return (*labels)[i]; }
+ Equal() : labels(nullptr) {}
+ Equal(const Equal &rhs) = default;
+ Equal &operator=(const Equal &rhs) = default;
+ Equal(const std::vector<vespalib::string> &labels_in) : labels(&labels_in) {}
+ bool operator()(const Key &a, const Key &b) const {
+ size_t len = (a.end - a.start);
+ if ((b.end - b.start) != len) {
+ return false;
+ }
+ for (size_t i = 0; i < len; ++i) {
+ if (get_label(a.start + i) != get_label(b.start + i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ bool operator()(const Key &a, const DirectStr &addr) const {
+ if (addr.size() != (a.end - a.start)) {
+ return false;
+ }
+ for (size_t i = 0; i < addr.size(); ++i) {
+ if (get_label(a.start + i) != addr[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ bool operator()(const Key &a, const DirectRef &addr) const {
+ if (addr.size() != (a.end - a.start)) {
+ return false;
+ }
+ for (size_t i = 0; i < addr.size(); ++i) {
+ if (get_label(a.start + i) != addr[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ bool operator()(const Key &a, const IndirectRef &addr) const {
+ if (addr.size() != (a.end - a.start)) {
+ return false;
+ }
+ for (size_t i = 0; i < addr.size(); ++i) {
+ if (get_label(a.start + i) != *addr[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+
+ using MapType = vespalib::hash_map<Key,uint32_t,Hash,Equal>;
+
+private:
+ std::vector<vespalib::string> _labels;
+ MapType _map;
+
+public:
+ SimpleSparseMap(size_t num_mapped_dims, size_t expected_subspaces)
+ : _labels(), _map(expected_subspaces * 2, Hash(_labels), Equal(_labels))
+ {
+ _labels.reserve(num_mapped_dims * expected_subspaces);
+ }
+ ~SimpleSparseMap();
+ size_t size() const { return _map.size(); }
+ static constexpr size_t npos() { return -1; }
+ const std::vector<vespalib::string> &labels() const { return _labels; }
+ void add_mapping(DirectStr addr) {
+ uint32_t value = _map.size();
+ uint32_t start = _labels.size();
+ for (const auto &label: addr) {
+ _labels.emplace_back(label);
+ }
+ uint32_t end = _labels.size();
+ auto [ignore, was_inserted] = _map.insert(std::make_pair(Key(start, end), value));
+ assert(was_inserted);
+ }
+ void add_mapping(DirectRef addr) {
+ uint32_t value = _map.size();
+ uint32_t start = _labels.size();
+ for (const auto &label: addr) {
+ _labels.emplace_back(label);
+ }
+ uint32_t end = _labels.size();
+ auto [ignore, was_inserted] = _map.insert(std::make_pair(Key(start, end), value));
+ assert(was_inserted);
+ }
+ void add_mapping(IndirectRef addr) {
+ uint32_t value = _map.size();
+ uint32_t start = _labels.size();
+ for (const auto *label: addr) {
+ _labels.emplace_back(*label);
+ }
+ uint32_t end = _labels.size();
+ auto [ignore, was_inserted] = _map.insert(std::make_pair(Key(start, end), value));
+ assert(was_inserted);
+ }
+ size_t lookup(DirectStr addr) const {
+ auto pos = _map.find(addr);
+ return (pos == _map.end()) ? npos() : pos->second;
+ }
+ size_t lookup(DirectRef addr) const {
+ auto pos = _map.find(addr);
+ return (pos == _map.end()) ? npos() : pos->second;
+ }
+ size_t lookup(IndirectRef addr) const {
+ auto pos = _map.find(addr);
+ return (pos == _map.end()) ? npos() : pos->second;
+ }
+};
+
+}
diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp
index 304244532d8..84ba35404fd 100644
--- a/eval/src/vespa/eval/eval/simple_value.cpp
+++ b/eval/src/vespa/eval/eval/simple_value.cpp
@@ -5,6 +5,7 @@
#include <vespa/vespalib/util/typify.h>
#include <vespa/vespalib/util/visit_ranges.h>
#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/log/log.h>
LOG_SETUP(".eval.simple_value");
@@ -17,107 +18,141 @@ namespace {
struct CreateSimpleValueBuilderBase {
template <typename T> static std::unique_ptr<ValueBuilderBase> invoke(const ValueType &type,
- size_t num_mapped_dims_in, size_t subspace_size_in)
+ size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces)
{
assert(check_cell_type<T>(type.cell_type()));
- return std::make_unique<SimpleValueT<T>>(type, num_mapped_dims_in, subspace_size_in);
+ return std::make_unique<SimpleValueT<T>>(type, num_mapped_dims, subspace_size, expected_subspaces);
}
};
-class SimpleValueView : public Value::Index::View {
-private:
- using Addr = std::vector<vespalib::string>;
- using Map = std::map<Addr,size_t>;
- using Itr = Map::const_iterator;
+//-----------------------------------------------------------------------------
+
+// look up a full address in the map directly
+struct LookupView : public Value::Index::View {
+
+ const SimpleSparseMap &index;
+ size_t subspace;
- const Map &_index;
- size_t _num_mapped_dims;
- std::vector<size_t> _match_dims;
- std::vector<size_t> _extract_dims;
- Addr _query;
- Itr _pos;
+ LookupView(const SimpleSparseMap &index_in)
+ : index(index_in), subspace(SimpleSparseMap::npos()) {}
+
+ void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
+ subspace = index.lookup(addr);
+ }
+
+ bool next_result(ConstArrayRef<vespalib::stringref*>, size_t &idx_out) override {
+ if (subspace == SimpleSparseMap::npos()) {
+ return false;
+ }
+ idx_out = subspace;
+ subspace = SimpleSparseMap::npos();
+ return true;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+// find matching mappings for a partial address with brute force filtering
+struct FilterView : public Value::Index::View {
+
+ size_t num_mapped_dims;
+ const std::vector<vespalib::string> &labels;
+ std::vector<size_t> match_dims;
+ std::vector<size_t> extract_dims;
+ std::vector<vespalib::string> query;
+ size_t pos;
- bool is_direct_lookup() const { return (_match_dims.size() == _num_mapped_dims); }
bool is_match() const {
- assert(_pos->first.size() == _num_mapped_dims);
- for (size_t idx: _match_dims) {
- if (_query[idx] != _pos->first[idx]) {
+ for (size_t i = 0; i < query.size(); ++i) {
+ if (query[i] != labels[pos + match_dims[i]]) {
return false;
}
}
return true;
}
-public:
- SimpleValueView(const Map &index, const std::vector<size_t> &match_dims, size_t num_mapped_dims)
- : _index(index), _num_mapped_dims(num_mapped_dims), _match_dims(match_dims), _extract_dims(), _query(num_mapped_dims, ""), _pos(_index.end())
+ FilterView(const std::vector<vespalib::string> &labels_in, const std::vector<size_t> &match_dims_in, size_t num_mapped_dims_in)
+ : num_mapped_dims(num_mapped_dims_in), labels(labels_in), match_dims(match_dims_in),
+ extract_dims(), query(match_dims.size(), ""), pos(labels.size())
{
- auto pos = _match_dims.begin();
- for (size_t i = 0; i < _num_mapped_dims; ++i) {
- if ((pos == _match_dims.end()) || (*pos != i)) {
- _extract_dims.push_back(i);
+ auto my_pos = match_dims.begin();
+ for (size_t i = 0; i < num_mapped_dims; ++i) {
+ if ((my_pos == match_dims.end()) || (*my_pos != i)) {
+ extract_dims.push_back(i);
} else {
- ++pos;
+ ++my_pos;
}
}
- assert(pos == _match_dims.end());
- assert((_match_dims.size() + _extract_dims.size()) == _num_mapped_dims);
+ assert(my_pos == match_dims.end());
+ assert((match_dims.size() + extract_dims.size()) == num_mapped_dims);
}
void lookup(ConstArrayRef<const vespalib::stringref*> addr) override {
- assert(addr.size() == _match_dims.size());
- for (size_t i = 0; i < _match_dims.size(); ++i) {
- _query[_match_dims[i]] = *addr[i];
- }
- if (is_direct_lookup()) {
- _pos = _index.find(_query);
- } else {
- _pos = _index.begin();
+ assert(addr.size() == query.size());
+ for (size_t i = 0; i < addr.size(); ++i) {
+ query[i] = *addr[i];
}
+ pos = 0;
}
bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
- assert(addr_out.size() == _extract_dims.size());
- while (_pos != _index.end()) {
+ while (pos < labels.size()) {
if (is_match()) {
- for (size_t i = 0; i < _extract_dims.size(); ++i) {
- *addr_out[i] = _pos->first[_extract_dims[i]];
- }
- idx_out = _pos->second;
- if (is_direct_lookup()) {
- _pos = _index.end();
- } else {
- ++_pos;
+ assert(addr_out.size() == extract_dims.size());
+ for (size_t i = 0; i < extract_dims.size(); ++i) {
+ *addr_out[i] = labels[pos + extract_dims[i]];
}
+ idx_out = (pos / num_mapped_dims); // is this expensive?
+ pos += num_mapped_dims;
return true;
}
- ++_pos;
+ pos += num_mapped_dims;
}
return false;
}
};
-} // namespace <unnamed>
-
//-----------------------------------------------------------------------------
-void
-SimpleValue::add_mapping(ConstArrayRef<vespalib::stringref> addr)
-{
- size_t id = _index.size();
- std::vector<vespalib::string> my_addr;
- for (const auto &label: addr) {
- my_addr.push_back(label);
+// iterate all mappings
+struct IterateView : public Value::Index::View {
+
+ size_t num_mapped_dims;
+ const std::vector<vespalib::string> &labels;
+ size_t pos;
+
+ IterateView(const std::vector<vespalib::string> &labels_in, size_t num_mapped_dims_in)
+ : num_mapped_dims(num_mapped_dims_in), labels(labels_in), pos(labels.size()) {}
+
+ void lookup(ConstArrayRef<const vespalib::stringref*>) override {
+ pos = 0;
}
- auto res = _index.emplace(std::move(my_addr), id);
- assert(res.second);
-}
-SimpleValue::SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in)
+ bool next_result(ConstArrayRef<vespalib::stringref*> addr_out, size_t &idx_out) override {
+ if (pos >= labels.size()) {
+ return false;
+ }
+ assert(addr_out.size() == num_mapped_dims);
+ for (size_t i = 0; i < num_mapped_dims; ++i) {
+ *addr_out[i] = labels[pos + i];
+ }
+ idx_out = (pos / num_mapped_dims); // is this expensive?
+ pos += num_mapped_dims;
+ return true;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace <unnamed>
+
+//-----------------------------------------------------------------------------
+
+SimpleValue::SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in)
: _type(type),
_num_mapped_dims(num_mapped_dims_in),
_subspace_size(subspace_size_in),
- _index()
+ _index(num_mapped_dims_in, expected_subspaces_in)
{
assert(_type.count_mapped_dimensions() == _num_mapped_dims);
assert(_type.dense_subspace_size() == _subspace_size);
@@ -128,16 +163,25 @@ SimpleValue::~SimpleValue() = default;
std::unique_ptr<Value::Index::View>
SimpleValue::create_view(const std::vector<size_t> &dims) const
{
- return std::make_unique<SimpleValueView>(_index, dims, _num_mapped_dims);
+ if (_num_mapped_dims == 0) {
+ return TrivialIndex::get().create_view(dims);
+ } else if (dims.empty()) {
+ return std::make_unique<IterateView>(_index.labels(), _num_mapped_dims);
+ } else if (dims.size() == _num_mapped_dims) {
+ return std::make_unique<LookupView>(_index);
+ } else {
+ return std::make_unique<FilterView>(_index.labels(), dims, _num_mapped_dims);
+ }
}
//-----------------------------------------------------------------------------
template <typename T>
-SimpleValueT<T>::SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in)
- : SimpleValue(type, num_mapped_dims_in, subspace_size_in),
+SimpleValueT<T>::SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in)
+ : SimpleValue(type, num_mapped_dims_in, subspace_size_in, expected_subspaces_in),
_cells()
{
+ _cells.reserve(subspace_size_in * expected_subspaces_in);
}
template <typename T>
@@ -148,7 +192,6 @@ ArrayRef<T>
SimpleValueT<T>::add_subspace(ConstArrayRef<vespalib::stringref> addr)
{
size_t old_size = _cells.size();
- assert(old_size == (index().size() * subspace_size()));
add_mapping(addr);
_cells.resize(old_size + subspace_size());
return ArrayRef<T>(&_cells[old_size], subspace_size());
@@ -160,10 +203,10 @@ SimpleValueBuilderFactory::SimpleValueBuilderFactory() = default;
SimpleValueBuilderFactory SimpleValueBuilderFactory::_factory;
std::unique_ptr<ValueBuilderBase>
-SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type,
- size_t num_mapped_dims_in, size_t subspace_size_in, size_t) const
+SimpleValueBuilderFactory::create_value_builder_base(const ValueType &type, size_t num_mapped_dims, size_t subspace_size,
+ size_t expected_subspaces) const
{
- return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims_in, subspace_size_in);
+ return typify_invoke<1,TypifyCellType,CreateSimpleValueBuilderBase>(type.cell_type(), type, num_mapped_dims, subspace_size, expected_subspaces);
}
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h
index 8fef9ea1dc8..f2df087cf37 100644
--- a/eval/src/vespa/eval/eval/simple_value.h
+++ b/eval/src/vespa/eval/eval/simple_value.h
@@ -3,6 +3,7 @@
#pragma once
#include "value.h"
+#include "simple_sparse_map.h"
#include <vespa/vespalib/stllike/string.h>
#include <vector>
#include <map>
@@ -15,24 +16,23 @@ class TensorSpec;
/**
* A simple implementation of a generic value that can also be used to
- * build new values. This class focuses on simplicity over speed and
- * is intended as a reference implementation that can also be used to
- * test the correctness of tensor operations as they are moved away
- * from the implementation of individual tensor classes.
+ * build new values. This class focuses on simplicity and is intended
+ * as a reference implementation that can also be used to test the
+ * correctness of tensor operations as they are moved away from the
+ * implementation of individual tensor classes.
**/
class SimpleValue : public Value, public Value::Index
{
private:
- using Addr = std::vector<vespalib::string>;
ValueType _type;
size_t _num_mapped_dims;
size_t _subspace_size;
- std::map<Addr,size_t> _index;
+ SimpleSparseMap _index;
protected:
size_t subspace_size() const { return _subspace_size; }
- void add_mapping(ConstArrayRef<vespalib::stringref> addr);
+ void add_mapping(ConstArrayRef<vespalib::stringref> addr) { _index.add_mapping(addr); }
public:
- SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in);
+ SimpleValue(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in);
~SimpleValue() override;
const ValueType &type() const override { return _type; }
const Value::Index &index() const override { return *this; }
@@ -49,7 +49,7 @@ class SimpleValueT : public SimpleValue, public ValueBuilder<T>
private:
std::vector<T> _cells;
public:
- SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in);
+ SimpleValueT(const ValueType &type, size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces_in);
~SimpleValueT() override;
TypedCells cells() const override { return TypedCells(ConstArrayRef<T>(_cells)); }
ArrayRef<T> add_subspace(ConstArrayRef<vespalib::stringref> addr) override;
@@ -69,7 +69,7 @@ private:
SimpleValueBuilderFactory();
static SimpleValueBuilderFactory _factory;
std::unique_ptr<ValueBuilderBase> create_value_builder_base(const ValueType &type,
- size_t num_mapped_dims_in, size_t subspace_size_in, size_t expected_subspaces) const override;
+ size_t num_mapped_dims, size_t subspace_size, size_t expected_subspaces) const override;
public:
static const SimpleValueBuilderFactory &get() { return _factory; }
};
diff --git a/eval/src/vespa/eval/eval/value.h b/eval/src/vespa/eval/eval/value.h
index 35a9b347e8b..da4fc47f29b 100644
--- a/eval/src/vespa/eval/eval/value.h
+++ b/eval/src/vespa/eval/eval/value.h
@@ -77,10 +77,10 @@ class TrivialIndex : public Value::Index {
private:
TrivialIndex();
static TrivialIndex _index;
- size_t size() const override;
- std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
public:
static const TrivialIndex &get() { return _index; }
+ size_t size() const override;
+ std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
};
class DoubleValue : public Value