From 669ab7bc6ee65bb7009d582bcdacc004807e6719 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Thu, 14 Jan 2021 10:45:09 +0000 Subject: remove FastSparseMap the use of string_id makes this obsolete --- eval/CMakeLists.txt | 1 - eval/src/tests/eval/fast_sparse_map/CMakeLists.txt | 9 - .../eval/fast_sparse_map/fast_sparse_map_test.cpp | 123 -------------- eval/src/vespa/eval/eval/CMakeLists.txt | 1 - eval/src/vespa/eval/eval/fast_sparse_map.cpp | 20 --- eval/src/vespa/eval/eval/fast_sparse_map.h | 188 --------------------- 6 files changed, 342 deletions(-) delete mode 100644 eval/src/tests/eval/fast_sparse_map/CMakeLists.txt delete mode 100644 eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp delete mode 100644 eval/src/vespa/eval/eval/fast_sparse_map.cpp delete mode 100644 eval/src/vespa/eval/eval/fast_sparse_map.h (limited to 'eval') diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 9f9818000fb..7aad02bb67a 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -15,7 +15,6 @@ vespa_define_module( src/tests/eval/array_array_map src/tests/eval/compile_cache src/tests/eval/compiled_function - src/tests/eval/fast_sparse_map src/tests/eval/fast_value src/tests/eval/function src/tests/eval/function_speed diff --git a/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt b/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt deleted file mode 100644 index fd9c1d37d43..00000000000 --- a/eval/src/tests/eval/fast_sparse_map/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(eval_fast_sparse_map_test_app TEST - SOURCES - fast_sparse_map_test.cpp - DEPENDS - vespaeval - GTest::GTest -) -vespa_add_test(NAME eval_fast_sparse_map_test_app COMMAND eval_fast_sparse_map_test_app) diff --git a/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp b/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp deleted file mode 100644 index 18517b34f57..00000000000 --- a/eval/src/tests/eval/fast_sparse_map/fast_sparse_map_test.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include -#include -#include - -using namespace vespalib; -using namespace vespalib::eval; - -class StringList { -private: - std::vector _str_list; - std::vector _ref_list; - std::vector _ref_ptr_list; -public: - StringList(const std::vector &list) - : _str_list(list), _ref_list(), _ref_ptr_list() - { - for (const auto &str: _str_list) { - _ref_list.push_back(str); - } - for (const auto &ref: _ref_list) { - _ref_ptr_list.push_back(&ref); - } - } - ~StringList(); - ConstArrayRef direct_str() const { return _str_list; } - ConstArrayRef direct_ref() const { return _ref_list; } - ConstArrayRef indirect_ref() const { return _ref_ptr_list; } - bool is_eq(ConstArrayRef addr) const { - if (addr.size() != _str_list.size()) { - return false; - } - for (size_t i = 0; i < addr.size(); ++i) { - if (addr[i].label != _str_list[i]) { - return false; - } - } - return true; - } -}; -StringList::~StringList() = default; -using SL = StringList; - -TEST(FastSparseMapTest, fast_sparse_map_basic_usage_works) { - SL a1({"a","a","a"}); - SL a2({"a","a","b"}); - SL a3({"a","b","a"}); - SL a4({"b","a","a"}); - FastSparseMap map(3, 128); - EXPECT_EQ(map.size(), 0); - map.add_mapping(a1.direct_str()); - map.add_mapping(a2.direct_ref()); - map.add_mapping(a3.indirect_ref()); - EXPECT_EQ(map.size(), 3); - EXPECT_EQ(map.lookup(a1.direct_str()), 0); - EXPECT_EQ(map.lookup(a1.direct_ref()), 0); - EXPECT_EQ(map.lookup(a1.indirect_ref()), 0); - EXPECT_EQ(map.lookup(a2.direct_str()), 1); - EXPECT_EQ(map.lookup(a2.direct_ref()), 1); - EXPECT_EQ(map.lookup(a2.indirect_ref()), 1); - EXPECT_EQ(map.lookup(a3.direct_str()), 2); - EXPECT_EQ(map.lookup(a3.direct_ref()), 2); - EXPECT_EQ(map.lookup(a3.indirect_ref()), 2); - EXPECT_EQ(map.lookup(a4.direct_str()), map.npos()); - EXPECT_EQ(map.lookup(a4.direct_ref()), map.npos()); - EXPECT_EQ(map.lookup(a4.indirect_ref()), map.npos()); - EXPECT_EQ(FastSparseMap::npos(), map.npos()); - EXPECT_EQ(map.labels().size(), 9); - std::set seen_hashes; - std::map addr_map; - auto my_fun = [&](uint32_t subspace, uint64_t hash) { - uint32_t addr_tag = subspace * 3; - addr_map[addr_tag] = subspace; - seen_hashes.insert(hash); - }; - map.each_map_entry(my_fun); - EXPECT_EQ(seen_hashes.size(), 3); - EXPECT_EQ(addr_map.size(), 3); - EXPECT_NE(addr_map.find(0), addr_map.end()); - EXPECT_EQ(addr_map[0], 0); - EXPECT_EQ(addr_map[3], 1); - EXPECT_EQ(addr_map[6], 2); - EXPECT_EQ(addr_map.size(), 3); - EXPECT_TRUE(a1.is_eq(map.make_addr(0))); - EXPECT_FALSE(a2.is_eq(map.make_addr(0))); - EXPECT_TRUE(a2.is_eq(map.make_addr(1))); - EXPECT_TRUE(a3.is_eq(map.make_addr(2))); -} - -TEST(FastSparseMapTest, fast_sparse_map_works_with_no_labels) { - SL empty({}); - FastSparseMap map1(0, 1); - FastSparseMap map2(0, 1); - FastSparseMap map3(0, 1); - EXPECT_EQ(map1.size(), 0); - EXPECT_EQ(map2.size(), 0); - EXPECT_EQ(map3.size(), 0); - map1.add_mapping(empty.direct_str()); - map2.add_mapping(empty.direct_ref()); - map3.add_mapping(empty.indirect_ref()); - EXPECT_EQ(map1.size(), 1); - EXPECT_EQ(map2.size(), 1); - EXPECT_EQ(map3.size(), 1); - EXPECT_EQ(map1.lookup(empty.direct_str()), 0); - EXPECT_EQ(map1.lookup(empty.direct_ref()), 0); - EXPECT_EQ(map1.lookup(empty.indirect_ref()), 0); - EXPECT_EQ(map2.lookup(empty.direct_str()), 0); - EXPECT_EQ(map2.lookup(empty.direct_ref()), 0); - EXPECT_EQ(map2.lookup(empty.indirect_ref()), 0); - EXPECT_EQ(map3.lookup(empty.direct_str()), 0); - EXPECT_EQ(map3.lookup(empty.direct_ref()), 0); - EXPECT_EQ(map3.lookup(empty.indirect_ref()), 0); - EXPECT_EQ(map1.labels().size(), 0); - EXPECT_EQ(map2.labels().size(), 0); - EXPECT_EQ(map3.labels().size(), 0); -} - -TEST(FastSparseMapTest, size_of_internal_types) { - EXPECT_EQ(sizeof(hash_node), 16); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/eval/src/vespa/eval/eval/CMakeLists.txt b/eval/src/vespa/eval/eval/CMakeLists.txt index 5f8dd478a7b..2c0922e617c 100644 --- a/eval/src/vespa/eval/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/eval/CMakeLists.txt @@ -12,7 +12,6 @@ vespa_add_library(eval_eval OBJECT double_value_builder.cpp fast_addr_map.cpp fast_forest.cpp - fast_sparse_map.cpp fast_value.cpp function.cpp gbdt.cpp diff --git a/eval/src/vespa/eval/eval/fast_sparse_map.cpp b/eval/src/vespa/eval/eval/fast_sparse_map.cpp deleted file mode 100644 index e5ffbb5c515..00000000000 --- a/eval/src/vespa/eval/eval/fast_sparse_map.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "fast_sparse_map.h" -#include - -namespace vespalib::eval { - -FastSparseMap::~FastSparseMap() = default; - -FastSparseMap& -FastSparseMap::operator=(const FastSparseMap& rhs) = default; - -FastSparseMap& -FastSparseMap::operator=(FastSparseMap&& rhs) = default; - -const FastSparseMap::HashedLabel FastSparseMap::empty_label; - -} - -VESPALIB_HASH_MAP_INSTANTIATE_H_E(vespalib::eval::FastSparseMap::Key, uint32_t, vespalib::eval::FastSparseMap::Hash, vespalib::eval::FastSparseMap::Equal); diff --git a/eval/src/vespa/eval/eval/fast_sparse_map.h b/eval/src/vespa/eval/eval/fast_sparse_map.h deleted file mode 100644 index 99e01e8c823..00000000000 --- a/eval/src/vespa/eval/eval/fast_sparse_map.h +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "memory_usage_stuff.h" -#include -#include -#include -#include -#include -#include - -namespace vespalib::eval { - -/** - * A wrapper around vespalib::hash_map, using it to map a list of - * labels (a sparse address) to an integer value (dense subspace - * index). Labels are stored in a separate vector to avoid - * fragmentation caused by hash keys being vectors of values. Labels - * can be specified in different ways during lookup and insert in - * order to reduce the need for data restructuring when used to - * integrate with the Value api. All labels are stored with a 64-bit - * hash. This hash is used as label equality (assuming no - * collisions). A order-sensitive 64bit hash constructed from - * individual label hashes is used for address equality (also assuming - * no collisions). The hash algorithm currently used is XXH3. - * - * 'add_mapping' will will bind the given address to an integer value - * equal to the current (pre-insert) size of the map. The given - * address MUST NOT already be in the map. - * - * 'lookup' will return the integer value associated with the - * given address or a special npos value if the value is not found. - **/ -class FastSparseMap -{ -public: - static uint64_t hash_label(const vespalib::string &str) { - return XXH3_64bits(str.data(), str.size()); - } - static uint64_t hash_label(vespalib::stringref str) { - return XXH3_64bits(str.data(), str.size()); - } - static uint64_t hash_label(const vespalib::stringref *str) { - return XXH3_64bits(str->data(), str->size()); - } - - struct HashedLabel { - vespalib::string label; - uint64_t hash; - HashedLabel() : label(), hash(0) {} - HashedLabel(const HashedLabel &rhs) = default; - HashedLabel &operator=(const HashedLabel &rhs) = default; - HashedLabel(HashedLabel &&rhs) = default; - HashedLabel &operator=(HashedLabel &&rhs) = default; - HashedLabel(const vespalib::string &str) : label(str), hash(hash_label(str)) {} - HashedLabel(vespalib::stringref str) : label(str), hash(hash_label(str)) {} - HashedLabel(const vespalib::stringref *str) : label(*str), hash(hash_label(*str)) {} - }; - - static const HashedLabel empty_label; - - static uint64_t hash_label(const HashedLabel &label) { - return label.hash; - } - - struct Key { - uint64_t hash; - Key() : hash(0) {} - Key(uint64_t hash_in) - : hash(hash_in) {} - } __attribute__((packed,aligned(4))); - - struct Hash { - uint64_t operator()(const Key &key) const { return key.hash; } - uint64_t operator()(uint64_t hash) const { return hash; } - }; - - struct Equal { - bool operator()(const Key &a, uint64_t b) const { return (a.hash == b); } - bool operator()(const Key &a, const Key &b) const { return (a.hash == b.hash); } - }; - - using MapType = vespalib::hash_map; - -private: - size_t _num_dims; - std::vector _labels; - MapType _map; - -public: - FastSparseMap(size_t num_dims_in, size_t expected_subspaces) - : _num_dims(num_dims_in), _labels(), _map(expected_subspaces * 2) - { - static_assert(std::is_same_v); - _labels.reserve(_num_dims * expected_subspaces); - } - ~FastSparseMap(); - - FastSparseMap& operator=(const FastSparseMap& rhs); - FastSparseMap& operator=(FastSparseMap&& rhs); - - MemoryUsage estimate_extra_memory_usage() const { - MemoryUsage extra_usage; - size_t map_self_size = sizeof(_map); - size_t map_used = _map.getMemoryUsed(); - size_t map_allocated = _map.getMemoryConsumption(); - // avoid double-counting the map itself - map_used = std::min(map_used, map_used - map_self_size); - map_allocated = std::min(map_allocated, map_allocated - map_self_size); - extra_usage.merge(MemoryUsage(map_used, map_allocated, 0, 0)); - extra_usage.merge(vector_extra_memory_usage(_labels)); - return extra_usage; - } - - size_t size() const { return _map.size(); } - size_t num_dims() const { return _num_dims; } - static constexpr size_t npos() { return -1; } - const std::vector &labels() const { return _labels; } - - ConstArrayRef make_addr(uint32_t index) const { - return ConstArrayRef(&_labels[index * _num_dims], _num_dims); - } - - template - uint64_t hash_addr(ConstArrayRef addr) const { - uint64_t h = 0; - for (const auto &label: addr) { - h = 31 * h + hash_label(label); - } - return h; - } - - // used to add a mapping, but in the unlikely case - // of hash collision it works like a lookup instead. - template - uint32_t add_mapping(ConstArrayRef addr, uint64_t hash) { - uint32_t value = _map.size(); - auto [iter, did_add] = _map.insert(std::make_pair(Key(hash), value)); - if (__builtin_expect(did_add, true)) { - for (const auto &label: addr) { - _labels.emplace_back(label); - } - return value; - } - return iter->second; - } - - // used to add a mapping, but in the unlikely case - // of hash collision it works like a lookup instead. - template - uint32_t add_mapping(ConstArrayRef addr) { - uint64_t hash = 0; - size_t old_labels_size = _labels.size(); - for (const auto &label: addr) { - _labels.emplace_back(label); - hash = 31 * hash + hash_label(_labels.back()); - } - uint32_t value = _map.size(); - auto [iter, did_add] = _map.insert(std::make_pair(Key(hash), value)); - if (__builtin_expect(did_add, true)) { - return value; - } - // undo adding to _labels - _labels.resize(old_labels_size); - return iter->second; - } - - size_t lookup(uint64_t hash) const { - auto pos = _map.find(hash); - return (pos == _map.end()) ? npos() : pos->second; - } - - template - size_t lookup(ConstArrayRef addr) const { - return lookup(hash_addr(addr)); - } - - template - void each_map_entry(F &&f) const { - _map.for_each([&](const auto &entry) - { - f(entry.second, entry.first.hash); - }); - } -}; - -} -- cgit v1.2.3