summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/attribute
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-05-23 13:23:36 +0200
committerGitHub <noreply@github.com>2023-05-23 13:23:36 +0200
commit999797b2b79b0d4ea3c6d5dd795a2c31454dd125 (patch)
tree1920b13c355b626e3acc64e36ee1c1013fa669de /searchlib/src/tests/attribute
parentbe124652c6f20677ecf07aad887d0cf02944c83e (diff)
parenta3f5a86d596c10b57f7a404e9a17fad56c50f20c (diff)
Merge pull request #27182 from vespa-engine/toregge/save-and-load-hnsw-max-squared-norm
Store max squared norm in file header during hnsw index save when using
Diffstat (limited to 'searchlib/src/tests/attribute')
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp41
1 files changed, 37 insertions, 4 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index e3c9e05073e..841d7f92b62 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -9,6 +9,7 @@
#include <vespa/searchlib/tensor/doc_vector_access.h>
#include <vespa/searchlib/tensor/distance_functions.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
+#include <vespa/searchlib/tensor/mips_distance_transform.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index_factory.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index_loader.h>
@@ -54,6 +55,7 @@ using search::tensor::DocVectorAccess;
using search::tensor::HnswIndex;
using search::tensor::HnswIndexType;
using search::tensor::HnswTestNode;
+using search::tensor::MipsDistanceFunctionFactoryBase;
using search::tensor::NearestNeighborIndex;
using search::tensor::NearestNeighborIndexFactory;
using search::tensor::NearestNeighborIndexLoader;
@@ -285,13 +287,15 @@ public:
void populate_address_space_usage(AddressSpaceUsage&) const override {}
void get_state(const vespalib::slime::Inserter&) const override {}
void shrink_lid_space(uint32_t) override { }
- std::unique_ptr<NearestNeighborIndexSaver> make_saver() const override {
+ std::unique_ptr<NearestNeighborIndexSaver> make_saver(vespalib::GenericHeader& header) const override {
+ (void) header;
if (_index_value != 0) {
return std::make_unique<MockIndexSaver>(_index_value);
}
return std::unique_ptr<NearestNeighborIndexSaver>();
}
- std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file) override {
+ std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override {
+ (void) header;
return std::make_unique<MockIndexLoader>(_index_value, file);
}
std::vector<Neighbor> find_top_k(uint32_t k,
@@ -342,12 +346,15 @@ class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory {
const vespalib::string test_dir = "test_data/";
const vespalib::string attr_name = test_dir + "my_attr";
+const vespalib::string hnsw_max_squared_norm = "hnsw.max_squared_norm";
+
struct FixtureTraits {
bool use_dense_tensor_attribute = false;
bool use_direct_tensor_attribute = false;
bool enable_hnsw_index = false;
bool use_mock_index = false;
bool use_mmap_file_allocator = false;
+ bool use_mips_distance = false;
FixtureTraits dense() && {
use_dense_tensor_attribute = true;
@@ -381,6 +388,14 @@ struct FixtureTraits {
return *this;
}
+ FixtureTraits mips_hnsw() && {
+ use_dense_tensor_attribute = true;
+ enable_hnsw_index = true;
+ use_mock_index = false;
+ use_mips_distance = true;
+ return *this;
+ }
+
FixtureTraits direct() && {
use_dense_tensor_attribute = false;
use_direct_tensor_attribute = true;
@@ -606,8 +621,9 @@ Fixture::Fixture(const vespalib::string &typeSpec, FixtureTraits traits)
_mmap_allocator_base_dir("mmap-file-allocator-factory-dir")
{
if (traits.enable_hnsw_index) {
- _cfg.set_distance_metric(DistanceMetric::Euclidean);
- _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean));
+ auto dm = traits.use_mips_distance ? DistanceMetric::Dotproduct : DistanceMetric::Euclidean;
+ _cfg.set_distance_metric(dm);
+ _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, dm));
}
vespalib::alloc::MmapFileAllocatorFactory::instance().setup(_mmap_allocator_base_dir);
setup();
@@ -1254,6 +1270,23 @@ TEST_F("Nearest neighbor index type is added to attribute file header", DenseTen
EXPECT_EQUAL("hnsw", header.getTag("nearest_neighbor_index").asString());
}
+class DenseTensorAttributeMipsIndex : public Fixture {
+public:
+ DenseTensorAttributeMipsIndex() : Fixture(vec_2d_spec, FixtureTraits().mips_hnsw()) {}
+};
+
+TEST_F("Nearest neighbor index with mips distance metrics stores square of max distance", DenseTensorAttributeMipsIndex)
+{
+ f.set_example_tensors();
+ f.save();
+ auto header = f.get_file_header();
+ EXPECT_TRUE(header.hasTag(hnsw_max_squared_norm));
+ EXPECT_EQUAL(130.0, header.getTag(hnsw_max_squared_norm).asFloat());
+ f.load();
+ auto& norm_store = dynamic_cast<MipsDistanceFunctionFactoryBase&>(f.hnsw_index().distance_function_factory()).get_max_squared_norm_store();
+ EXPECT_EQUAL(130.0, norm_store.get_max());
+}
+
template <typename ParentT>
class NearestNeighborBlueprintFixtureBase : public ParentT {
private: