aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/tensor
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-11-24 10:26:10 +0100
committerTor Egge <Tor.Egge@online.no>2022-11-24 10:26:10 +0100
commite2c7b0f200a65dd9c74a9a70d6cc37f1bccb11fb (patch)
tree92b62c1903531d72a5c02ff6c872b75ffc91bf1f /searchlib/src/tests/tensor
parentba11a8a87877dd3a97586255839b02782b70de87 (diff)
Avoid duplicate docid when searching in hnsw index with multiple nodes per
document.
Diffstat (limited to 'searchlib/src/tests/tensor')
-rw-r--r--searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp109
2 files changed, 118 insertions, 0 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt
new file mode 100644
index 00000000000..3cb6a286580
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_hnsw_best_neighbors_test_app TEST
+ SOURCES
+ hnsw_best_neighbors_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_hnsw_best_neighbors_test_app COMMAND searchlib_hnsw_best_neighbors_test_app)
diff --git a/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp b/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp
new file mode 100644
index 00000000000..c05b85d3e59
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp
@@ -0,0 +1,109 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/tensor/hnsw_multi_best_neighbors.h>
+#include <vespa/searchlib/tensor/hnsw_single_best_neighbors.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <ostream>
+
+using vespalib::datastore::EntryRef;
+using search::tensor::HnswMultiBestNeighbors;
+using search::tensor::HnswSingleBestNeighbors;
+using search::tensor::NearestNeighborIndex;
+
+using Neighbor = NearestNeighborIndex::Neighbor;
+
+namespace search::tensor
+{
+
+std::ostream& operator<<(std::ostream& os, const Neighbor& neighbor) {
+ os << "{ docid=" << neighbor.docid << ", distance=" << neighbor.distance << "}";
+ return os;
+}
+
+}
+
+struct DocidThenDistanceOrder
+{
+ bool operator()(const Neighbor& lhs, const Neighbor& rhs) const {
+ if (lhs.docid != rhs.docid) {
+ return lhs.docid < rhs.docid;
+ }
+ return lhs.distance < rhs.distance;
+ }
+};
+
+template <typename BestNeighbors>
+class HnswBestNeighborsTest : public testing::Test {
+protected:
+ BestNeighbors _neighbors;
+ HnswBestNeighborsTest()
+ : testing::Test(),
+ _neighbors()
+ {
+ populate();
+ }
+ ~HnswBestNeighborsTest() override;
+
+ void add(uint32_t nodeid, uint32_t docid, double distance)
+ {
+ _neighbors.emplace(nodeid, docid, EntryRef(), distance);
+ }
+
+ size_t size() const { return _neighbors.size(); }
+
+ void assert_neighbors(std::vector<NearestNeighborIndex::Neighbor> exp, uint32_t k, double distance_limit) {
+ auto neighbors_copy = _neighbors;
+ auto act = neighbors_copy.get_neighbors(k, distance_limit);
+ std::sort(act.begin(), act.end(), DocidThenDistanceOrder());
+ EXPECT_EQ(exp, act);
+ }
+
+ void populate()
+ {
+ add(3, 3, 7.0);
+ add(2, 2, 10.0);
+ add(1, 1, 1.0);
+ add(4, 2, 5.0);
+ }
+};
+
+template <typename BestNeighbors>
+HnswBestNeighborsTest<BestNeighbors>::~HnswBestNeighborsTest() = default;
+
+using HnswBestNeighborsTypedTestTypes = testing::Types<HnswSingleBestNeighbors, HnswMultiBestNeighbors>;
+
+TYPED_TEST_SUITE(HnswBestNeighborsTest, HnswBestNeighborsTypedTestTypes);
+
+TYPED_TEST(HnswBestNeighborsTest, k_limit_is_enforced)
+{
+ this->assert_neighbors({}, 0, 40.0);
+ this->assert_neighbors({{1, 1.0}}, 1, 40.0);
+ this->assert_neighbors({{1, 1.0}, {2, 5.0}}, 2, 40.0);
+ this->assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 3, 40.0);
+}
+
+TYPED_TEST(HnswBestNeighborsTest, distance_limit_is_enforced)
+{
+ this->assert_neighbors({}, 40, 0.5);
+ this->assert_neighbors({{1, 1.0}}, 40, 1.0);
+ this->assert_neighbors({{1, 1.0}, {2, 5.0}}, 40, 5.0);
+ this->assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 40, 7.0);
+}
+
+using HnswSingleBestNeighborsTest = HnswBestNeighborsTest<HnswSingleBestNeighbors>;
+
+TEST_F(HnswSingleBestNeighborsTest, duplicate_docids_are_not_elimiated)
+{
+ EXPECT_EQ(4, size());
+ assert_neighbors({{1, 1.0}, {2, 5.0}, {2, 10.0}, {3, 7.0}}, 40, 40.0);
+}
+
+using HnswMultiBestNeighborsTest = HnswBestNeighborsTest<HnswMultiBestNeighbors>;
+
+TEST_F(HnswMultiBestNeighborsTest, duplicate_docids_are_eliminated)
+{
+ EXPECT_EQ(3, size());
+ assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 40, 40.0);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()