diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-05-23 13:23:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-23 13:23:36 +0200 |
commit | 999797b2b79b0d4ea3c6d5dd795a2c31454dd125 (patch) | |
tree | 1920b13c355b626e3acc64e36ee1c1013fa669de /searchlib/src/tests/attribute | |
parent | be124652c6f20677ecf07aad887d0cf02944c83e (diff) | |
parent | a3f5a86d596c10b57f7a404e9a17fad56c50f20c (diff) |
Merge pull request #27182 from vespa-engine/toregge/save-and-load-hnsw-max-squared-norm
Store max squared norm in file header during hnsw index save when using
Diffstat (limited to 'searchlib/src/tests/attribute')
-rw-r--r-- | searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp | 41 |
1 files changed, 37 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index e3c9e05073e..841d7f92b62 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -9,6 +9,7 @@ #include <vespa/searchlib/tensor/doc_vector_access.h> #include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/hnsw_index.h> +#include <vespa/searchlib/tensor/mips_distance_transform.h> #include <vespa/searchlib/tensor/nearest_neighbor_index.h> #include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h> #include <vespa/searchlib/tensor/nearest_neighbor_index_loader.h> @@ -54,6 +55,7 @@ using search::tensor::DocVectorAccess; using search::tensor::HnswIndex; using search::tensor::HnswIndexType; using search::tensor::HnswTestNode; +using search::tensor::MipsDistanceFunctionFactoryBase; using search::tensor::NearestNeighborIndex; using search::tensor::NearestNeighborIndexFactory; using search::tensor::NearestNeighborIndexLoader; @@ -285,13 +287,15 @@ public: void populate_address_space_usage(AddressSpaceUsage&) const override {} void get_state(const vespalib::slime::Inserter&) const override {} void shrink_lid_space(uint32_t) override { } - std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override { + std::unique_ptr<NearestNeighborIndexSaver> make_saver(vespalib::GenericHeader& header) const override { + (void) header; if (_index_value != 0) { return std::make_unique<MockIndexSaver>(_index_value); } return std::unique_ptr<NearestNeighborIndexSaver>(); } - std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file) override { + std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override { + (void) header; return std::make_unique<MockIndexLoader>(_index_value, file); } std::vector<Neighbor> find_top_k(uint32_t k, @@ -342,12 +346,15 @@ class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory { const vespalib::string test_dir = "test_data/"; const vespalib::string attr_name = test_dir + "my_attr"; +const vespalib::string hnsw_max_squared_norm = "hnsw.max_squared_norm"; + struct FixtureTraits { bool use_dense_tensor_attribute = false; bool use_direct_tensor_attribute = false; bool enable_hnsw_index = false; bool use_mock_index = false; bool use_mmap_file_allocator = false; + bool use_mips_distance = false; FixtureTraits dense() && { use_dense_tensor_attribute = true; @@ -381,6 +388,14 @@ struct FixtureTraits { return *this; } + FixtureTraits mips_hnsw() && { + use_dense_tensor_attribute = true; + enable_hnsw_index = true; + use_mock_index = false; + use_mips_distance = true; + return *this; + } + FixtureTraits direct() && { use_dense_tensor_attribute = false; use_direct_tensor_attribute = true; @@ -606,8 +621,9 @@ Fixture::Fixture(const vespalib::string &typeSpec, FixtureTraits traits) _mmap_allocator_base_dir("mmap-file-allocator-factory-dir") { if (traits.enable_hnsw_index) { - _cfg.set_distance_metric(DistanceMetric::Euclidean); - _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean)); + auto dm = traits.use_mips_distance ? DistanceMetric::Dotproduct : DistanceMetric::Euclidean; + _cfg.set_distance_metric(dm); + _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, dm)); } vespalib::alloc::MmapFileAllocatorFactory::instance().setup(_mmap_allocator_base_dir); setup(); @@ -1254,6 +1270,23 @@ TEST_F("Nearest neighbor index type is added to attribute file header", DenseTen EXPECT_EQUAL("hnsw", header.getTag("nearest_neighbor_index").asString()); } +class DenseTensorAttributeMipsIndex : public Fixture { +public: + DenseTensorAttributeMipsIndex() : Fixture(vec_2d_spec, FixtureTraits().mips_hnsw()) {} +}; + +TEST_F("Nearest neighbor index with mips distance metrics stores square of max distance", DenseTensorAttributeMipsIndex) +{ + f.set_example_tensors(); + f.save(); + auto header = f.get_file_header(); + EXPECT_TRUE(header.hasTag(hnsw_max_squared_norm)); + EXPECT_EQUAL(130.0, header.getTag(hnsw_max_squared_norm).asFloat()); + f.load(); + auto& norm_store = dynamic_cast<MipsDistanceFunctionFactoryBase&>(f.hnsw_index().distance_function_factory()).get_max_squared_norm_store(); + EXPECT_EQUAL(130.0, norm_store.get_max()); +} + template <typename ParentT> class NearestNeighborBlueprintFixtureBase : public ParentT { private: |