diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-05-13 11:31:54 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-13 11:31:54 +0200 |
commit | 2e0762cfcd363373d5f1bceeec22b2557aa94cf5 (patch) | |
tree | d82fa3274584db862c157f04cbe00d279250f2c0 /searchlib | |
parent | f4c95586e6a99df2b0c5b174c74bb574457f38c7 (diff) | |
parent | fe5a4c73f70cc451f461bc007d1ce7d32481edb2 (diff) |
Merge pull request #13229 from vespa-engine/geirst/distance-metric-when-not-having-hnsw-index
Support specifying a distance metric for nearest neighbor search when…
Diffstat (limited to 'searchlib')
4 files changed, 22 insertions, 10 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index 1cb314165cd..3728b87c6df 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -278,14 +278,25 @@ AttributeManagerTest::testConfigConvert() AttributeVector::Config out = ConfigConverter::convert(a); EXPECT_EQUAL("tensor(x[5])", out.tensorType().to_spec()); } + { // distance metric (default) + CACA a; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.distance_metric() == DistanceMetric::Euclidean); + } + { // distance metric (explicit) + CACA a; + a.distancemetric = AttributesConfig::Attribute::Distancemetric::GEODEGREES; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.distance_metric() == DistanceMetric::GeoDegrees); + } { // hnsw index params (enabled) - auto dm_in = AttributesConfig::Attribute::Index::Hnsw::Distancemetric::ANGULAR; - auto dm_out = search::attribute::DistanceMetric::Angular; + auto dm_in = AttributesConfig::Attribute::Distancemetric::ANGULAR; + auto dm_out = DistanceMetric::Angular; CACA a; + a.distancemetric = dm_in; a.index.hnsw.enabled = true; a.index.hnsw.maxlinkspernode = 32; a.index.hnsw.neighborstoexploreatinsert = 300; - a.index.hnsw.distancemetric = dm_in; auto out = ConfigConverter::convert(a); EXPECT_TRUE(out.hnsw_index_params().has_value()); EXPECT_EQUAL(32u, out.hnsw_index_params().value().max_links_per_node()); diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 592a8aa6a36..47d8fdf92d5 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -248,6 +248,7 @@ struct Fixture { _useDenseTensorAttribute(useDenseTensorAttribute) { if (enable_hnsw_index) { + _cfg.set_distance_metric(DistanceMetric::Euclidean); _cfg.set_hnsw_index_params(HnswIndexParams(4, 20, DistanceMetric::Euclidean)); } setup(); diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index c573f0b4210..f6c39b9570d 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -73,10 +73,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) predicateParams.setBounds(cfg.lowerbound, cfg.upperbound); predicateParams.setDensePostingListThreshold(cfg.densepostinglistthreshold); retval.setPredicateParams(predicateParams); - if (cfg.index.hnsw.enabled) { - using CfgDm = AttributesConfig::Attribute::Index::Hnsw::Distancemetric; - DistanceMetric dm; - switch (cfg.index.hnsw.distancemetric) { + using CfgDm = AttributesConfig::Attribute::Distancemetric; + DistanceMetric dm(DistanceMetric::Euclidean); + switch (cfg.distancemetric) { case CfgDm::EUCLIDEAN: dm = DistanceMetric::Euclidean; break; @@ -86,7 +85,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) case CfgDm::GEODEGREES: dm = DistanceMetric::GeoDegrees; break; - } + } + retval.set_distance_metric(dm); + if (cfg.index.hnsw.enabled) { retval.set_hnsw_index_params(HnswIndexParams(cfg.index.hnsw.maxlinkspernode, cfg.index.hnsw.neighborstoexploreatinsert, dm)); diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index b0db678dfc6..a96e7cb8764 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -65,8 +65,7 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f auto rct = _attr_tensor.getTensorType().cell_type(); auto fixup_fun = vespalib::tensor::select_2<ConvertCellsSelector>(lct, rct); fixup_fun(_query_tensor, _attr_tensor.getTensorType()); - auto def_dm = search::attribute::DistanceMetric::Euclidean; - _fallback_dist_fun = search::tensor::make_distance_function(def_dm, rct); + _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.getConfig().distance_metric(), rct); _dist_fun = _fallback_dist_fun.get(); auto nns_index = _attr_tensor.nearest_neighbor_index(); if (nns_index) { |