summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-02-27 14:53:19 +0000
committerArne Juul <arnej@verizonmedia.com>2020-02-27 14:56:02 +0000
commit86b796ae6e77ce18f0837eba381a396e35cf4fee (patch)
treec752bec4a613159db21abf560d69a3fee72f6026 /searchlib
parentcd00f3cf5e3ed3f78417241871ad06518a0dee46 (diff)
make actual random levels
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h26
5 files changed, 56 insertions, 7 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 2516950b0cc..a2a29e7bc38 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -5,6 +5,7 @@
#include <vespa/searchlib/tensor/doc_vector_access.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/random_level_generator.h>
+#include <vespa/searchlib/tensor/inv_log_level_generator.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/util/generationhandler.h>
#include <vector>
@@ -65,6 +66,9 @@ public:
.set(4, {1, 2}).set(5, {8, 3}).set(6, {7, 2})
.set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5});
}
+
+ ~HnswIndexTest() {}
+
void init(bool heuristic_select_neighbors) {
auto generator = std::make_unique<LevelGenerator>();
level_generator = generator.get();
@@ -441,5 +445,19 @@ TEST_F(HnswIndexTest, shrink_called_heuristic)
EXPECT_TRUE(index->check_link_symmetry());
}
+TEST(LevelGeneratorTest, gives_various_levels)
+{
+ InvLogLevelGenerator generator(4);
+ EXPECT_EQ(2u, generator.max_level());
+ EXPECT_EQ(1u, generator.max_level());
+ EXPECT_EQ(0u, generator.max_level());
+ EXPECT_EQ(1u, generator.max_level());
+ EXPECT_EQ(0u, generator.max_level());
+ EXPECT_EQ(1u, generator.max_level());
+ EXPECT_EQ(0u, generator.max_level());
+ EXPECT_EQ(0u, generator.max_level());
+ EXPECT_EQ(0u, generator.max_level());
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 0bdcd53af77..d0f33c6fd0b 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -11,6 +11,7 @@ vespa_add_library(searchlib_tensor OBJECT
hnsw_index.cpp
imported_tensor_attribute_vector.cpp
imported_tensor_attribute_vector_read_guard.cpp
+ inv_log_level_generator.cpp
nearest_neighbor_index.cpp
tensor_attribute.cpp
tensor_store.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
index 68efe6417c0..8b0c5e931a4 100644
--- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
@@ -4,6 +4,7 @@
#include "distance_functions.h"
#include "hnsw_index.h"
#include "random_level_generator.h"
+#include "inv_log_level_generator.h"
#include <vespa/searchcommon/attribute/config.h>
namespace search::tensor {
@@ -27,24 +28,24 @@ make_distance_function(ValueType::CellType cell_type)
}
RandomLevelGenerator::UP
-make_random_level_generator()
+make_random_level_generator(uint32_t m)
{
- // TODO: Make generator that results in hierarchical graph.
- return std::make_unique<LevelZeroGenerator>();
+ return std::make_unique<InvLogLevelGenerator>(m);
}
-}
+} // namespace <unnamed>
std::unique_ptr<NearestNeighborIndex>
DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors,
vespalib::eval::ValueType::CellType cell_type,
const search::attribute::HnswIndexParams& params) const
{
- HnswIndex::Config cfg(params.max_links_per_node() * 2,
- params.max_links_per_node(),
+ uint32_t m = params.max_links_per_node();
+ HnswIndex::Config cfg(m * 2,
+ m,
params.neighbors_to_explore_at_insert(),
true);
- return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(), cfg);
+ return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(m), cfg);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp
new file mode 100644
index 00000000000..540edc5e664
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.cpp
@@ -0,0 +1,3 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "inv_log_level_generator.h"
diff --git a/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h
new file mode 100644
index 00000000000..3d69e40625b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/inv_log_level_generator.h
@@ -0,0 +1,26 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "random_level_generator.h"
+#include <random>
+
+namespace search::tensor {
+
+class InvLogLevelGenerator : public RandomLevelGenerator {
+ std::mt19937_64 _rng;
+ std::uniform_real_distribution<double> _uniform;
+ double _levelMultiplier;
+public:
+ InvLogLevelGenerator(uint32_t m)
+ : _rng(0x1234deadbeef5678uLL),
+ _uniform(0.0, 1.0),
+ _levelMultiplier(1.0 / log(1.0 * m))
+ {}
+
+ uint32_t max_level() override {
+ double unif = _uniform(_rng);
+ double r = -log(1.0-unif) * _levelMultiplier;
+ return (uint32_t) r;
+ }
+};
+
+}