diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-02-27 14:53:19 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-02-27 14:56:02 +0000 |
commit | 86b796ae6e77ce18f0837eba381a396e35cf4fee (patch) | |
tree | c752bec4a613159db21abf560d69a3fee72f6026 /searchlib | |
parent | cd00f3cf5e3ed3f78417241871ad06518a0dee46 (diff) |
make actual random levels
Diffstat (limited to 'searchlib')
5 files changed, 56 insertions, 7 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 2516950b0cc..a2a29e7bc38 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -5,6 +5,7 @@ #include <vespa/searchlib/tensor/doc_vector_access.h> #include <vespa/searchlib/tensor/hnsw_index.h> #include <vespa/searchlib/tensor/random_level_generator.h> +#include <vespa/searchlib/tensor/inv_log_level_generator.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/generationhandler.h> #include <vector> @@ -65,6 +66,9 @@ public: .set(4, {1, 2}).set(5, {8, 3}).set(6, {7, 2}) .set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5}); } + + ~HnswIndexTest() {} + void init(bool heuristic_select_neighbors) { auto generator = std::make_unique<LevelGenerator>(); level_generator = generator.get(); @@ -441,5 +445,19 @@ TEST_F(HnswIndexTest, shrink_called_heuristic) EXPECT_TRUE(index->check_link_symmetry()); } +TEST(LevelGeneratorTest, gives_various_levels) +{ + InvLogLevelGenerator generator(4); + EXPECT_EQ(2u, generator.max_level()); + EXPECT_EQ(1u, generator.max_level()); + EXPECT_EQ(0u, generator.max_level()); + EXPECT_EQ(1u, generator.max_level()); + EXPECT_EQ(0u, generator.max_level()); + EXPECT_EQ(1u, generator.max_level()); + EXPECT_EQ(0u, generator.max_level()); + EXPECT_EQ(0u, generator.max_level()); + EXPECT_EQ(0u, generator.max_level()); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 0bdcd53af77..d0f33c6fd0b 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -11,6 +11,7 @@ vespa_add_library(searchlib_tensor OBJECT hnsw_index.cpp imported_tensor_attribute_vector.cpp imported_tensor_attribute_vector_read_guard.cpp + inv_log_level_generator.cpp nearest_neighbor_index.cpp tensor_attribute.cpp tensor_store.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp index 68efe6417c0..8b0c5e931a4 100644 --- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp @@ -4,6 +4,7 @@ #include "distance_functions.h" #include "hnsw_index.h" #include "random_level_generator.h" +#include "inv_log_level_generator.h" #include <vespa/searchcommon/attribute/config.h> namespace search::tensor { @@ -27,24 +28,24 @@ make_distance_function(ValueType::CellType cell_type) } RandomLevelGenerator::UP -make_random_level_generator() +make_random_level_generator(uint32_t m) { - // TODO: Make generator that results in hierarchical graph. - return std::make_unique<LevelZeroGenerator>(); + return std::make_unique<InvLogLevelGenerator>(m); } -} +} // namespace <unnamed> std::unique_ptr<NearestNeighborIndex> DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors, vespalib::eval::ValueType::CellType cell_type, const search::attribute::HnswIndexParams& params) const { - HnswIndex::Config cfg(params.max_links_per_node() * 2, - params.max_links_per_node(), + uint32_t m = params.max_links_per_node(); + HnswIndex::Config cfg(m * 2, + m, params.neighbors_to_explore_at_insert(), true); - return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(), cfg); + return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(m), cfg); } } diff --git a/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp new file mode 100644 index 00000000000..540edc5e664 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp @@ -0,0 +1,3 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "inv_log_level_generator.h" diff --git a/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h new file mode 100644 index 00000000000..3d69e40625b --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h @@ -0,0 +1,26 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "random_level_generator.h" +#include <random> + +namespace search::tensor { + +class InvLogLevelGenerator : public RandomLevelGenerator { + std::mt19937_64 _rng; + std::uniform_real_distribution<double> _uniform; + double _levelMultiplier; +public: + InvLogLevelGenerator(uint32_t m) + : _rng(0x1234deadbeef5678uLL), + _uniform(0.0, 1.0), + _levelMultiplier(1.0 / log(1.0 * m)) + {} + + uint32_t max_level() override { + double unif = _uniform(_rng); + double r = -log(1.0-unif) * _levelMultiplier; + return (uint32_t) r; + } +}; + +} |