diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-11-24 10:26:10 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-11-24 10:26:10 +0100 |
commit | e2c7b0f200a65dd9c74a9a70d6cc37f1bccb11fb (patch) | |
tree | 92b62c1903531d72a5c02ff6c872b75ffc91bf1f /searchlib/src/tests/tensor | |
parent | ba11a8a87877dd3a97586255839b02782b70de87 (diff) |
Avoid duplicate docid when searching in hnsw index with multiple nodes per
document.
Diffstat (limited to 'searchlib/src/tests/tensor')
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt | 9 | ||||
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp | 109 |
2 files changed, 118 insertions, 0 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt new file mode 100644 index 00000000000..3cb6a286580 --- /dev/null +++ b/searchlib/src/tests/tensor/hnsw_best_neighbors/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_hnsw_best_neighbors_test_app TEST + SOURCES + hnsw_best_neighbors_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_hnsw_best_neighbors_test_app COMMAND searchlib_hnsw_best_neighbors_test_app) diff --git a/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp b/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp new file mode 100644 index 00000000000..c05b85d3e59 --- /dev/null +++ b/searchlib/src/tests/tensor/hnsw_best_neighbors/hnsw_best_neighbors_test.cpp @@ -0,0 +1,109 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/tensor/hnsw_multi_best_neighbors.h> +#include <vespa/searchlib/tensor/hnsw_single_best_neighbors.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <ostream> + +using vespalib::datastore::EntryRef; +using search::tensor::HnswMultiBestNeighbors; +using search::tensor::HnswSingleBestNeighbors; +using search::tensor::NearestNeighborIndex; + +using Neighbor = NearestNeighborIndex::Neighbor; + +namespace search::tensor +{ + +std::ostream& operator<<(std::ostream& os, const Neighbor& neighbor) { + os << "{ docid=" << neighbor.docid << ", distance=" << neighbor.distance << "}"; + return os; +} + +} + +struct DocidThenDistanceOrder +{ + bool operator()(const Neighbor& lhs, const Neighbor& rhs) const { + if (lhs.docid != rhs.docid) { + return lhs.docid < rhs.docid; + } + return lhs.distance < rhs.distance; + } +}; + +template <typename BestNeighbors> +class HnswBestNeighborsTest : public testing::Test { +protected: + BestNeighbors _neighbors; + HnswBestNeighborsTest() + : testing::Test(), + _neighbors() + { + populate(); + } + ~HnswBestNeighborsTest() override; + + void add(uint32_t nodeid, uint32_t docid, double distance) + { + _neighbors.emplace(nodeid, docid, EntryRef(), distance); + } + + size_t size() const { return _neighbors.size(); } + + void assert_neighbors(std::vector<NearestNeighborIndex::Neighbor> exp, uint32_t k, double distance_limit) { + auto neighbors_copy = _neighbors; + auto act = neighbors_copy.get_neighbors(k, distance_limit); + std::sort(act.begin(), act.end(), DocidThenDistanceOrder()); + EXPECT_EQ(exp, act); + } + + void populate() + { + add(3, 3, 7.0); + add(2, 2, 10.0); + add(1, 1, 1.0); + add(4, 2, 5.0); + } +}; + +template <typename BestNeighbors> +HnswBestNeighborsTest<BestNeighbors>::~HnswBestNeighborsTest() = default; + +using HnswBestNeighborsTypedTestTypes = testing::Types<HnswSingleBestNeighbors, HnswMultiBestNeighbors>; + +TYPED_TEST_SUITE(HnswBestNeighborsTest, HnswBestNeighborsTypedTestTypes); + +TYPED_TEST(HnswBestNeighborsTest, k_limit_is_enforced) +{ + this->assert_neighbors({}, 0, 40.0); + this->assert_neighbors({{1, 1.0}}, 1, 40.0); + this->assert_neighbors({{1, 1.0}, {2, 5.0}}, 2, 40.0); + this->assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 3, 40.0); +} + +TYPED_TEST(HnswBestNeighborsTest, distance_limit_is_enforced) +{ + this->assert_neighbors({}, 40, 0.5); + this->assert_neighbors({{1, 1.0}}, 40, 1.0); + this->assert_neighbors({{1, 1.0}, {2, 5.0}}, 40, 5.0); + this->assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 40, 7.0); +} + +using HnswSingleBestNeighborsTest = HnswBestNeighborsTest<HnswSingleBestNeighbors>; + +TEST_F(HnswSingleBestNeighborsTest, duplicate_docids_are_not_elimiated) +{ + EXPECT_EQ(4, size()); + assert_neighbors({{1, 1.0}, {2, 5.0}, {2, 10.0}, {3, 7.0}}, 40, 40.0); +} + +using HnswMultiBestNeighborsTest = HnswBestNeighborsTest<HnswMultiBestNeighbors>; + +TEST_F(HnswMultiBestNeighborsTest, duplicate_docids_are_eliminated) +{ + EXPECT_EQ(3, size()); + assert_neighbors({{1, 1.0}, {2, 5.0}, {3, 7.0}}, 40, 40.0); +} + +GTEST_MAIN_RUN_ALL_TESTS() |