diff options
15 files changed, 0 insertions, 439 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 0d87881711c..d1b88a7e2ab 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -35,6 +35,5 @@ vespa_add_library(searchlib_tensor OBJECT tensor_deserialize.cpp tensor_store.cpp tensor_store_saver.cpp - reusable_set_visited_tracker.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 10f06a1e1ec..a10ff1c48dc 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -23,8 +23,6 @@ LOG_SETUP(".searchlib.tensor.hnsw_index"); -#define USE_OLD_VISITED_TRACKER 0 - namespace search::tensor { using search::AddressSpaceComponents; @@ -319,15 +317,11 @@ HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, doc_id_limit = std::min(filter->size(), doc_id_limit); } uint32_t estimated_visited_nodes = estimate_visited_nodes(level, doc_id_limit, neighbors_to_find, filter); -#if ! USE_OLD_VISITED_TRACKER if (estimated_visited_nodes >= doc_id_limit / 128) { search_layer_helper<BitVectorVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes); } else { search_layer_helper<HashSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes); } -#else - search_layer_helper<ReusableSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes); -#endif } HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func, @@ -337,7 +331,6 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan _distance_func(std::move(distance_func)), _level_generator(std::move(level_generator)), _cfg(cfg), - _visited_set_pool(), _compaction_spec() { assert(_distance_func); @@ -597,7 +590,6 @@ HnswIndex::update_stat(const CompactionStrategy& compaction_strategy) _compaction_spec = HnswIndexCompactionSpec(compaction_strategy.should_compact(level_arrays_memory_usage, level_arrays_address_space_usage), compaction_strategy.should_compact(link_arrays_memory_usage, link_arrays_address_space_usage)); result.merge(link_arrays_memory_usage); - result.merge(_visited_set_pool.memory_usage()); return result; } @@ -608,7 +600,6 @@ HnswIndex::memory_usage() const result.merge(_graph.node_refs.getMemoryUsage()); result.merge(_graph.nodes.getMemoryUsage()); result.merge(_graph.links.getMemoryUsage()); - result.merge(_visited_set_pool.memory_usage()); return result; } @@ -628,10 +619,6 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const StateExplorerUtils::memory_usage_to_slime(_graph.node_refs.getMemoryUsage(), memUsageObj.setObject("node_refs")); StateExplorerUtils::memory_usage_to_slime(_graph.nodes.getMemoryUsage(), memUsageObj.setObject("nodes")); StateExplorerUtils::memory_usage_to_slime(_graph.links.getMemoryUsage(), memUsageObj.setObject("links")); - StateExplorerUtils::memory_usage_to_slime(_visited_set_pool.memory_usage(), memUsageObj.setObject("visited_set_pool")); - auto& visitedObj = object.setObject("visited_set"); - visitedObj.setLong("create_count", _visited_set_pool.create_count()); - visitedObj.setLong("reuse_count", _visited_set_pool.reuse_count()); object.setLong("nodes", _graph.size()); auto& histogram_array = object.setArray("level_histogram"); auto& links_hst_array = object.setArray("level_0_links_histogram"); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 8a7422907ea..43e3001b496 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -15,7 +15,6 @@ #include <vespa/vespalib/datastore/atomic_entry_ref.h> #include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/datastore/entryref.h> -#include <vespa/vespalib/util/reusable_set_pool.h> #include <vespa/vespalib/stllike/allocator.h> namespace search::tensor { @@ -98,7 +97,6 @@ protected: DistanceFunction::UP _distance_func; RandomLevelGenerator::UP _level_generator; Config _cfg; - mutable vespalib::ReusableSetPool _visited_set_pool; HnswIndexCompactionSpec _compaction_spec; uint32_t max_links_for_level(uint32_t level) const; @@ -221,7 +219,6 @@ public: bool check_link_symmetry() const; std::pair<uint32_t, bool> count_reachable_nodes() const; HnswGraph& get_graph() { return _graph; } - vespalib::ReusableSetPool& get_visited_set_pool() const noexcept { return _visited_set_pool; } static vespalib::datastore::ArrayStoreConfig make_default_node_store_config(); static vespalib::datastore::ArrayStoreConfig make_default_link_store_config(); diff --git a/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.cpp b/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.cpp deleted file mode 100644 index d3cdd28fea3..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "reusable_set_visited_tracker.h" -#include "hnsw_index.h" - -namespace search::tensor { - -ReusableSetVisitedTracker::ReusableSetVisitedTracker(const HnswIndex& index, uint32_t doc_id_limit, uint32_t) - : _visited(index.get_visited_set_pool().get(doc_id_limit)) -{ -} - -ReusableSetVisitedTracker::~ReusableSetVisitedTracker() = default; - -} diff --git a/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.h b/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.h deleted file mode 100644 index 9528cfe78f9..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/reusable_set_visited_tracker.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/vespalib/util/reusable_set_handle.h> - -namespace search::tensor { - -class HnswIndex; - -/* - * Tracker for visited nodes based on vespalib::ReusableSet. - */ -class ReusableSetVisitedTracker -{ - vespalib::ReusableSetHandle _visited; -public: - ReusableSetVisitedTracker(const HnswIndex& index, uint32_t doc_id_limit, uint32_t); - ~ReusableSetVisitedTracker(); - void mark(uint32_t doc_id) { _visited.mark(doc_id); } - bool try_mark(uint32_t doc_id) { - if (_visited.is_marked(doc_id)) { - return false; - } else { - _visited.mark(doc_id); - return true; - } - } -}; - -} diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt index 94ee05930d5..c498639533f 100644 --- a/vespalib/CMakeLists.txt +++ b/vespalib/CMakeLists.txt @@ -192,7 +192,6 @@ vespa_define_module( src/tests/util/mmap_file_allocator src/tests/util/mmap_file_allocator_factory src/tests/util/rcuvector - src/tests/util/reusable_set src/tests/util/size_literals src/tests/util/string_escape src/tests/valgrind diff --git a/vespalib/src/tests/util/reusable_set/CMakeLists.txt b/vespalib/src/tests/util/reusable_set/CMakeLists.txt deleted file mode 100644 index fa67311db6f..00000000000 --- a/vespalib/src/tests/util/reusable_set/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(vespalib_reusable_set_test_app TEST - SOURCES - reusable_set_test.cpp - DEPENDS - vespalib - GTest::GTest -) -vespa_add_test(NAME vespalib_reusable_set_test_app COMMAND vespalib_reusable_set_test_app) diff --git a/vespalib/src/tests/util/reusable_set/reusable_set_test.cpp b/vespalib/src/tests/util/reusable_set/reusable_set_test.cpp deleted file mode 100644 index fb3e73d03ed..00000000000 --- a/vespalib/src/tests/util/reusable_set/reusable_set_test.cpp +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include <vespa/vespalib/util/reusable_set_pool.h> -#include <vespa/vespalib/gtest/gtest.h> - -using namespace vespalib; - -using Mark = ReusableSet::Mark; - -void verify_set(const ReusableSet &set, size_t sz, Mark val, size_t marked) { - EXPECT_EQ(sz, set.capacity()); - EXPECT_EQ(val, set.generation()); - size_t count = 0; - for (size_t i = 0; i < set.capacity(); ++i) { - if (set.is_marked(i)) ++count; - } - EXPECT_EQ(marked, count); -} - -void verify_handle(const ReusableSetHandle &set, size_t sz, Mark val, size_t marked) { - EXPECT_EQ(sz, set.capacity()); - EXPECT_EQ(val, set.generation()); - size_t count = 0; - for (size_t i = 0; i < set.capacity(); ++i) { - if (set.is_marked(i)) ++count; - } - EXPECT_EQ(marked, count); -} - -class Pool : public ::testing::Test { -public: - ReusableSetPool pool; - Pool() : pool() {} - ~Pool() {} - void exercise(ReusableSetHandle &set) { - size_t sz = set.capacity(); - size_t count = 0; - for (size_t i = 0; i < sz; ++i) { - if (set.is_marked(i)) ++count; - } - EXPECT_EQ(0, count); - for (int i = 0; i < 17; ++i) { - set.mark((i * 711) % sz); - } - count = 0; - for (size_t i = 0; i < sz; ++i) { - if (set.is_marked(i)) ++count; - } - EXPECT_EQ(17, count); - for (int i = 0; i < 17; ++i) { - set.mark((i * 711) % sz); - } - count = 0; - for (size_t i = 0; i < sz; ++i) { - if (set.is_marked(i)) ++count; - } - EXPECT_EQ(17, count); - } -}; - - -TEST(ReusableSetTest, simple_usage) -{ - ReusableSet visited(7); - verify_set(visited, 7, 1, 0); - visited.mark(1); - visited.mark(2); - visited.mark(4); - EXPECT_EQ(false, visited.is_marked(0)); - EXPECT_EQ(true, visited.is_marked(1)); - EXPECT_EQ(true, visited.is_marked(2)); - EXPECT_EQ(false, visited.is_marked(3)); - verify_set(visited, 7, 1, 3); - visited.mark(4); - visited.mark(1); - visited.mark(2); - verify_set(visited, 7, 1, 3); - EXPECT_EQ(false, visited.is_marked(0)); - EXPECT_EQ(true, visited.is_marked(1)); - EXPECT_EQ(true, visited.is_marked(2)); - EXPECT_EQ(false, visited.is_marked(3)); - visited.clear(); - verify_set(visited, 7, 2, 0); - visited.clear(); - verify_set(visited, 7, 3, 0); -} - -TEST_F(Pool, reuse_works) -{ - for (int i = 0; i < 65535; ++i) { - auto handle = pool.get(7); - EXPECT_EQ(i, pool.reuse_count()); - EXPECT_EQ(1, pool.create_count()); - verify_handle(handle, 248, i+1, 0); - exercise(handle); - } - EXPECT_LT(500, pool.memory_usage().allocatedBytes()); - EXPECT_GT(1000, pool.memory_usage().allocatedBytes()); - for (int i = 0; i < 5; ++i) { - auto handle = pool.get(7); - EXPECT_EQ(65535+i, pool.reuse_count()); - EXPECT_EQ(1, pool.create_count()); - verify_handle(handle, 248, i+1, 0); - exercise(handle); - } - auto handle3 = pool.get(260); - EXPECT_EQ(2, pool.create_count()); - verify_handle(handle3, 297, 1, 0); - exercise(handle3); - { - auto handle4 = pool.get(400); - EXPECT_EQ(3, pool.create_count()); - verify_handle(handle4, 400, 1, 0); - exercise(handle4); - EXPECT_LT(1000, pool.memory_usage().usedBytes()); - EXPECT_GT(2000, pool.memory_usage().usedBytes()); - } - EXPECT_LT(500, pool.memory_usage().usedBytes()); - EXPECT_GT(1000, pool.memory_usage().usedBytes()); - auto handle7 = pool.get(401); - EXPECT_EQ(4, pool.create_count()); - verify_handle(handle7, 480, 1, 0); - exercise(handle7); - EXPECT_LT(1000, pool.memory_usage().allocatedBytes()); - EXPECT_GT(3000, pool.memory_usage().allocatedBytes()); - { - auto handle8 = pool.get(2500); - auto handle9 = pool.get(2500); - EXPECT_LT(11000, pool.memory_usage().allocatedBytes()); - EXPECT_GT(13000, pool.memory_usage().allocatedBytes()); - auto handleA = pool.get(25000); - auto handleB = pool.get(25000); - EXPECT_LT(111000, pool.memory_usage().usedBytes()); - EXPECT_GT(113000, pool.memory_usage().usedBytes()); - } - EXPECT_GT(3000, pool.memory_usage().usedBytes()); -} - -GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 8cdc9444daa..3812fda4bdf 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -68,9 +68,6 @@ vespa_add_library(vespalib_vespalib_util OBJECT regexp.cpp require.cpp resource_limits.cpp - reusable_set.cpp - reusable_set_handle.cpp - reusable_set_pool.cpp round_up_to_page_size.cpp runnable.cpp runnable_pair.cpp diff --git a/vespalib/src/vespa/vespalib/util/reusable_set.cpp b/vespalib/src/vespa/vespalib/util/reusable_set.cpp deleted file mode 100644 index 22b8903ccc7..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set.cpp +++ /dev/null @@ -1,3 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "reusable_set.h" diff --git a/vespalib/src/vespa/vespalib/util/reusable_set.h b/vespalib/src/vespa/vespalib/util/reusable_set.h deleted file mode 100644 index 081cad05df3..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "array.h" -#include "array.hpp" -#include <memory> -#include <cstring> - - -namespace vespalib { - -/** - * Generational marker implementation of a vector of boolean values. - * Limited API, used for marking "seen" nodes when exploring a graph. - **/ -class ReusableSet - -{ -public: - using Mark = unsigned short; - - explicit ReusableSet(size_t size) - : _array(size), - _curval(-1), - _sz(size) - { - clear(); - } - - ~ReusableSet() { - } - - /** - * Increments the generation value, only - * initializing the underlying memory when it wraps - **/ - void clear() { - if (++_curval == 0) { - memset(bits(), 0, _sz * sizeof(Mark)); - ++_curval; - } - } - - void mark(size_t id) { - _array[id] = _curval; - } - - bool is_marked(size_t id) const { - return (_array[id] == _curval); - } - - Mark *bits() { return _array.begin(); } - Mark generation() const { return _curval; } - size_t capacity() const { return _sz; } - - size_t memory_usage() const { - return (_sz * sizeof(Mark)) + sizeof(ReusableSet); - } - -private: - Array<Mark> _array; - Mark _curval; - size_t _sz; -}; - -} // namespace diff --git a/vespalib/src/vespa/vespalib/util/reusable_set_handle.cpp b/vespalib/src/vespa/vespalib/util/reusable_set_handle.cpp deleted file mode 100644 index e7d5aef0eda..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set_handle.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "reusable_set_handle.h" -#include "reusable_set_pool.h" - -namespace vespalib { - -ReusableSetHandle::~ReusableSetHandle() -{ - _pool.reuse(std::move(_owned)); -} - -} // namespace diff --git a/vespalib/src/vespa/vespalib/util/reusable_set_handle.h b/vespalib/src/vespa/vespalib/util/reusable_set_handle.h deleted file mode 100644 index 77a2e3a69d5..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set_handle.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "reusable_set.h" - -namespace vespalib { - -class ReusableSetPool; - -/** - * Wraps a ReusableSet allocated from a ReusableSetPool. - * Note that the handle returns the wrapped set to the pool - * on destruction. - **/ -class ReusableSetHandle -{ -private: - using Mark = ReusableSet::Mark; - using RSUP = std::unique_ptr<ReusableSet>; - - Mark *_bits; - Mark _curval; - RSUP _owned; - ReusableSetPool &_pool; - -public: - ReusableSetHandle(RSUP backing, ReusableSetPool& owner) - : _bits(backing->bits()), - _curval(backing->generation()), - _owned(std::move(backing)), - _pool(owner) - {} - - ~ReusableSetHandle(); - - /** mark an ID */ - void mark(size_t id) { - _bits[id] = _curval; - } - - /** check if an ID has been marked */ - bool is_marked(size_t id) const { - return (_bits[id] == _curval); - } - - // for unit tests and statistics - size_t capacity() const { return _owned->capacity(); } - Mark generation() const { return _curval; } -}; - -} // namespace diff --git a/vespalib/src/vespa/vespalib/util/reusable_set_pool.cpp b/vespalib/src/vespa/vespalib/util/reusable_set_pool.cpp deleted file mode 100644 index c59c2220457..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set_pool.cpp +++ /dev/null @@ -1,3 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "reusable_set_pool.h" diff --git a/vespalib/src/vespa/vespalib/util/reusable_set_pool.h b/vespalib/src/vespa/vespalib/util/reusable_set_pool.h deleted file mode 100644 index 65a0630f4ac..00000000000 --- a/vespalib/src/vespa/vespalib/util/reusable_set_pool.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "reusable_set.h" -#include "reusable_set_handle.h" -#include "memoryusage.h" - -#include <vector> -#include <mutex> - -namespace vespalib { - -/** - * A resource pool for ReusableSet instances. - * Note that the pool should have a guaranteed lifetime - * that is longer than any Handle retrieved from the pool. - **/ -class ReusableSetPool -{ - using RSUP = std::unique_ptr<ReusableSet>; - using Guard = std::lock_guard<std::mutex>; - std::vector<RSUP> _lru_stack; - mutable std::mutex _lock; - size_t _reuse_count; - size_t _create_count; - MemoryUsage _total_memory; - const size_t _min_size; - const size_t _grow_percent; - - ReusableSetPool(const ReusableSetPool &) = delete; - ReusableSetPool& operator= (const ReusableSetPool &) = delete; - -public: - ReusableSetPool() - : _lru_stack(), _lock(), - _reuse_count(0), _create_count(0), - _total_memory(), - _min_size(248), _grow_percent(20) - { - _total_memory.incAllocatedBytes(sizeof(ReusableSetPool)); - } - - /** Create or re-use a set with (at least) the given size. */ - ReusableSetHandle get(size_t size) { - Guard guard(_lock); - size_t last_used_size = 0; - while (! _lru_stack.empty()) { - RSUP r = std::move(_lru_stack.back()); - _lru_stack.pop_back(); - if (r->capacity() >= size) { - r->clear(); - ++_reuse_count; - _total_memory.incUsedBytes(r->memory_usage()); - return ReusableSetHandle(std::move(r), *this); - } - _total_memory.decAllocatedBytes(r->memory_usage()); - last_used_size = std::max(last_used_size, r->capacity()); - } - double grow_factor = (1.0 + _grow_percent / 100.0); - last_used_size *= grow_factor; - size_t at_least_size = std::max(_min_size, last_used_size); - RSUP r = std::make_unique<ReusableSet>(std::max(at_least_size, size)); - _total_memory.incAllocatedBytes(r->memory_usage()); - ++_create_count; - _total_memory.incUsedBytes(r->memory_usage()); - return ReusableSetHandle(std::move(r), *this); - } - - /** Return a ReusableSet to the pool. */ - void reuse(RSUP used) { - Guard guard(_lock); - _total_memory.decUsedBytes(used->memory_usage()); - _lru_stack.push_back(std::move(used)); - } - - // for unit testing and statistics - size_t reuse_count() const { return _reuse_count; } - size_t create_count() const { return _create_count; } - MemoryUsage memory_usage() const { - Guard guard(_lock); - return _total_memory; - } -}; - -} // namespace |