summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-01-14 10:45:09 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-01-14 10:45:09 +0000
commit669ab7bc6ee65bb7009d582bcdacc004807e6719 (patch)
treeb4b4333b1a351532bedd9a92dab7899f7312e852 /eval
parent79cff3b24d30e29c534e2efdb85bbc5bab62c4f7 (diff)
remove FastSparseMap
the use of string_id makes this obsolete
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/eval/fast_sparse_map/CMakeLists.txt9
-rw-r--r--eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp123
-rw-r--r--eval/src/vespa/eval/eval/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/eval/fast_sparse_map.cpp20
-rw-r--r--eval/src/vespa/eval/eval/fast_sparse_map.h188
6 files changed, 0 insertions, 342 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 9f9818000fb..7aad02bb67a 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -15,7 +15,6 @@ vespa_define_module(
src/tests/eval/array_array_map
src/tests/eval/compile_cache
src/tests/eval/compiled_function
- src/tests/eval/fast_sparse_map
src/tests/eval/fast_value
src/tests/eval/function
src/tests/eval/function_speed
diff --git a/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt b/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt
deleted file mode 100644
index fd9c1d37d43..00000000000
--- a/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(eval_fast_sparse_map_test_app TEST
- SOURCES
- fast_sparse_map_test.cpp
- DEPENDS
- vespaeval
- GTest::GTest
-)
-vespa_add_test(NAME eval_fast_sparse_map_test_app COMMAND eval_fast_sparse_map_test_app)
diff --git a/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp b/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp
deleted file mode 100644
index 18517b34f57..00000000000
--- a/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/eval/eval/fast_sparse_map.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
-#include <vespa/vespalib/gtest/gtest.h>
-
-using namespace vespalib;
-using namespace vespalib::eval;
-
-class StringList {
-private:
- std::vector<vespalib::string> _str_list;
- std::vector<vespalib::stringref> _ref_list;
- std::vector<const vespalib::stringref *> _ref_ptr_list;
-public:
- StringList(const std::vector<vespalib::string> &list)
- : _str_list(list), _ref_list(), _ref_ptr_list()
- {
- for (const auto &str: _str_list) {
- _ref_list.push_back(str);
- }
- for (const auto &ref: _ref_list) {
- _ref_ptr_list.push_back(&ref);
- }
- }
- ~StringList();
- ConstArrayRef<vespalib::string> direct_str() const { return _str_list; }
- ConstArrayRef<vespalib::stringref> direct_ref() const { return _ref_list; }
- ConstArrayRef<const vespalib::stringref *> indirect_ref() const { return _ref_ptr_list; }
- bool is_eq(ConstArrayRef<FastSparseMap::HashedLabel> addr) const {
- if (addr.size() != _str_list.size()) {
- return false;
- }
- for (size_t i = 0; i < addr.size(); ++i) {
- if (addr[i].label != _str_list[i]) {
- return false;
- }
- }
- return true;
- }
-};
-StringList::~StringList() = default;
-using SL = StringList;
-
-TEST(FastSparseMapTest, fast_sparse_map_basic_usage_works) {
- SL a1({"a","a","a"});
- SL a2({"a","a","b"});
- SL a3({"a","b","a"});
- SL a4({"b","a","a"});
- FastSparseMap map(3, 128);
- EXPECT_EQ(map.size(), 0);
- map.add_mapping(a1.direct_str());
- map.add_mapping(a2.direct_ref());
- map.add_mapping(a3.indirect_ref());
- EXPECT_EQ(map.size(), 3);
- EXPECT_EQ(map.lookup(a1.direct_str()), 0);
- EXPECT_EQ(map.lookup(a1.direct_ref()), 0);
- EXPECT_EQ(map.lookup(a1.indirect_ref()), 0);
- EXPECT_EQ(map.lookup(a2.direct_str()), 1);
- EXPECT_EQ(map.lookup(a2.direct_ref()), 1);
- EXPECT_EQ(map.lookup(a2.indirect_ref()), 1);
- EXPECT_EQ(map.lookup(a3.direct_str()), 2);
- EXPECT_EQ(map.lookup(a3.direct_ref()), 2);
- EXPECT_EQ(map.lookup(a3.indirect_ref()), 2);
- EXPECT_EQ(map.lookup(a4.direct_str()), map.npos());
- EXPECT_EQ(map.lookup(a4.direct_ref()), map.npos());
- EXPECT_EQ(map.lookup(a4.indirect_ref()), map.npos());
- EXPECT_EQ(FastSparseMap::npos(), map.npos());
- EXPECT_EQ(map.labels().size(), 9);
- std::set<uint64_t> seen_hashes;
- std::map<uint32_t, uint32_t> addr_map;
- auto my_fun = [&](uint32_t subspace, uint64_t hash) {
- uint32_t addr_tag = subspace * 3;
- addr_map[addr_tag] = subspace;
- seen_hashes.insert(hash);
- };
- map.each_map_entry(my_fun);
- EXPECT_EQ(seen_hashes.size(), 3);
- EXPECT_EQ(addr_map.size(), 3);
- EXPECT_NE(addr_map.find(0), addr_map.end());
- EXPECT_EQ(addr_map[0], 0);
- EXPECT_EQ(addr_map[3], 1);
- EXPECT_EQ(addr_map[6], 2);
- EXPECT_EQ(addr_map.size(), 3);
- EXPECT_TRUE(a1.is_eq(map.make_addr(0)));
- EXPECT_FALSE(a2.is_eq(map.make_addr(0)));
- EXPECT_TRUE(a2.is_eq(map.make_addr(1)));
- EXPECT_TRUE(a3.is_eq(map.make_addr(2)));
-}
-
-TEST(FastSparseMapTest, fast_sparse_map_works_with_no_labels) {
- SL empty({});
- FastSparseMap map1(0, 1);
- FastSparseMap map2(0, 1);
- FastSparseMap map3(0, 1);
- EXPECT_EQ(map1.size(), 0);
- EXPECT_EQ(map2.size(), 0);
- EXPECT_EQ(map3.size(), 0);
- map1.add_mapping(empty.direct_str());
- map2.add_mapping(empty.direct_ref());
- map3.add_mapping(empty.indirect_ref());
- EXPECT_EQ(map1.size(), 1);
- EXPECT_EQ(map2.size(), 1);
- EXPECT_EQ(map3.size(), 1);
- EXPECT_EQ(map1.lookup(empty.direct_str()), 0);
- EXPECT_EQ(map1.lookup(empty.direct_ref()), 0);
- EXPECT_EQ(map1.lookup(empty.indirect_ref()), 0);
- EXPECT_EQ(map2.lookup(empty.direct_str()), 0);
- EXPECT_EQ(map2.lookup(empty.direct_ref()), 0);
- EXPECT_EQ(map2.lookup(empty.indirect_ref()), 0);
- EXPECT_EQ(map3.lookup(empty.direct_str()), 0);
- EXPECT_EQ(map3.lookup(empty.direct_ref()), 0);
- EXPECT_EQ(map3.lookup(empty.indirect_ref()), 0);
- EXPECT_EQ(map1.labels().size(), 0);
- EXPECT_EQ(map2.labels().size(), 0);
- EXPECT_EQ(map3.labels().size(), 0);
-}
-
-TEST(FastSparseMapTest, size_of_internal_types) {
- EXPECT_EQ(sizeof(hash_node<FastSparseMap::MapType::value_type>), 16);
-}
-
-GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt
index 5f8dd478a7b..2c0922e617c 100644
--- a/eval/src/vespa/eval/eval/CMakeLists.txt
+++ b/eval/src/vespa/eval/eval/CMakeLists.txt
@@ -12,7 +12,6 @@ vespa_add_library(eval_eval OBJECT
double_value_builder.cpp
fast_addr_map.cpp
fast_forest.cpp
- fast_sparse_map.cpp
fast_value.cpp
function.cpp
gbdt.cpp
diff --git a/eval/src/vespa/eval/eval/fast_sparse_map.cpp b/eval/src/vespa/eval/eval/fast_sparse_map.cpp
deleted file mode 100644
index e5ffbb5c515..00000000000
--- a/eval/src/vespa/eval/eval/fast_sparse_map.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "fast_sparse_map.h"
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-namespace vespalib::eval {
-
-FastSparseMap::~FastSparseMap() = default;
-
-FastSparseMap&
-FastSparseMap::operator=(const FastSparseMap& rhs) = default;
-
-FastSparseMap&
-FastSparseMap::operator=(FastSparseMap&& rhs) = default;
-
-const FastSparseMap::HashedLabel FastSparseMap::empty_label;
-
-}
-
-VESPALIB_HASH_MAP_INSTANTIATE_H_E(vespalib::eval::FastSparseMap::Key, uint32_t, vespalib::eval::FastSparseMap::Hash, vespalib::eval::FastSparseMap::Equal);
diff --git a/eval/src/vespa/eval/eval/fast_sparse_map.h b/eval/src/vespa/eval/eval/fast_sparse_map.h
deleted file mode 100644
index 99e01e8c823..00000000000
--- a/eval/src/vespa/eval/eval/fast_sparse_map.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "memory_usage_stuff.h"
-#include <vespa/vespalib/util/arrayref.h>
-#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/stllike/hash_map.h>
-#include <vector>
-#include <xxhash.h>
-#include <type_traits>
-
-namespace vespalib::eval {
-
-/**
- * A wrapper around vespalib::hash_map, using it to map a list of
- * labels (a sparse address) to an integer value (dense subspace
- * index). Labels are stored in a separate vector to avoid
- * fragmentation caused by hash keys being vectors of values. Labels
- * can be specified in different ways during lookup and insert in
- * order to reduce the need for data restructuring when used to
- * integrate with the Value api. All labels are stored with a 64-bit
- * hash. This hash is used as label equality (assuming no
- * collisions). A order-sensitive 64bit hash constructed from
- * individual label hashes is used for address equality (also assuming
- * no collisions). The hash algorithm currently used is XXH3.
- *
- * 'add_mapping' will will bind the given address to an integer value
- * equal to the current (pre-insert) size of the map. The given
- * address MUST NOT already be in the map.
- *
- * 'lookup' will return the integer value associated with the
- * given address or a special npos value if the value is not found.
- **/
-class FastSparseMap
-{
-public:
- static uint64_t hash_label(const vespalib::string &str) {
- return XXH3_64bits(str.data(), str.size());
- }
- static uint64_t hash_label(vespalib::stringref str) {
- return XXH3_64bits(str.data(), str.size());
- }
- static uint64_t hash_label(const vespalib::stringref *str) {
- return XXH3_64bits(str->data(), str->size());
- }
-
- struct HashedLabel {
- vespalib::string label;
- uint64_t hash;
- HashedLabel() : label(), hash(0) {}
- HashedLabel(const HashedLabel &rhs) = default;
- HashedLabel &operator=(const HashedLabel &rhs) = default;
- HashedLabel(HashedLabel &&rhs) = default;
- HashedLabel &operator=(HashedLabel &&rhs) = default;
- HashedLabel(const vespalib::string &str) : label(str), hash(hash_label(str)) {}
- HashedLabel(vespalib::stringref str) : label(str), hash(hash_label(str)) {}
- HashedLabel(const vespalib::stringref *str) : label(*str), hash(hash_label(*str)) {}
- };
-
- static const HashedLabel empty_label;
-
- static uint64_t hash_label(const HashedLabel &label) {
- return label.hash;
- }
-
- struct Key {
- uint64_t hash;
- Key() : hash(0) {}
- Key(uint64_t hash_in)
- : hash(hash_in) {}
- } __attribute__((packed,aligned(4)));
-
- struct Hash {
- uint64_t operator()(const Key &key) const { return key.hash; }
- uint64_t operator()(uint64_t hash) const { return hash; }
- };
-
- struct Equal {
- bool operator()(const Key &a, uint64_t b) const { return (a.hash == b); }
- bool operator()(const Key &a, const Key &b) const { return (a.hash == b.hash); }
- };
-
- using MapType = vespalib::hash_map<Key,uint32_t,Hash,Equal>;
-
-private:
- size_t _num_dims;
- std::vector<HashedLabel> _labels;
- MapType _map;
-
-public:
- FastSparseMap(size_t num_dims_in, size_t expected_subspaces)
- : _num_dims(num_dims_in), _labels(), _map(expected_subspaces * 2)
- {
- static_assert(std::is_same_v<XXH64_hash_t, uint64_t>);
- _labels.reserve(_num_dims * expected_subspaces);
- }
- ~FastSparseMap();
-
- FastSparseMap& operator=(const FastSparseMap& rhs);
- FastSparseMap& operator=(FastSparseMap&& rhs);
-
- MemoryUsage estimate_extra_memory_usage() const {
- MemoryUsage extra_usage;
- size_t map_self_size = sizeof(_map);
- size_t map_used = _map.getMemoryUsed();
- size_t map_allocated = _map.getMemoryConsumption();
- // avoid double-counting the map itself
- map_used = std::min(map_used, map_used - map_self_size);
- map_allocated = std::min(map_allocated, map_allocated - map_self_size);
- extra_usage.merge(MemoryUsage(map_used, map_allocated, 0, 0));
- extra_usage.merge(vector_extra_memory_usage(_labels));
- return extra_usage;
- }
-
- size_t size() const { return _map.size(); }
- size_t num_dims() const { return _num_dims; }
- static constexpr size_t npos() { return -1; }
- const std::vector<HashedLabel> &labels() const { return _labels; }
-
- ConstArrayRef<HashedLabel> make_addr(uint32_t index) const {
- return ConstArrayRef<HashedLabel>(&_labels[index * _num_dims], _num_dims);
- }
-
- template <typename T>
- uint64_t hash_addr(ConstArrayRef<T> addr) const {
- uint64_t h = 0;
- for (const auto &label: addr) {
- h = 31 * h + hash_label(label);
- }
- return h;
- }
-
- // used to add a mapping, but in the unlikely case
- // of hash collision it works like a lookup instead.
- template <typename T>
- uint32_t add_mapping(ConstArrayRef<T> addr, uint64_t hash) {
- uint32_t value = _map.size();
- auto [iter, did_add] = _map.insert(std::make_pair(Key(hash), value));
- if (__builtin_expect(did_add, true)) {
- for (const auto &label: addr) {
- _labels.emplace_back(label);
- }
- return value;
- }
- return iter->second;
- }
-
- // used to add a mapping, but in the unlikely case
- // of hash collision it works like a lookup instead.
- template <typename T>
- uint32_t add_mapping(ConstArrayRef<T> addr) {
- uint64_t hash = 0;
- size_t old_labels_size = _labels.size();
- for (const auto &label: addr) {
- _labels.emplace_back(label);
- hash = 31 * hash + hash_label(_labels.back());
- }
- uint32_t value = _map.size();
- auto [iter, did_add] = _map.insert(std::make_pair(Key(hash), value));
- if (__builtin_expect(did_add, true)) {
- return value;
- }
- // undo adding to _labels
- _labels.resize(old_labels_size);
- return iter->second;
- }
-
- size_t lookup(uint64_t hash) const {
- auto pos = _map.find(hash);
- return (pos == _map.end()) ? npos() : pos->second;
- }
-
- template <typename T>
- size_t lookup(ConstArrayRef<T> addr) const {
- return lookup(hash_addr(addr));
- }
-
- template <typename F>
- void each_map_entry(F &&f) const {
- _map.for_each([&](const auto &entry)
- {
- f(entry.second, entry.first.hash);
- });
- }
-};
-
-}