diff options
8 files changed, 61 insertions, 22 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java index 9e7f14a3d85..7b798e66567 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java @@ -39,7 +39,7 @@ import java.util.Set; */ public final class Attribute implements Cloneable, Serializable { - public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR } + public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT } // Remember to change hashCode and equals when you add new fields diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java index 64f3bda4ac4..3ca182e18c2 100644 --- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java @@ -15,6 +15,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.Optional; +import static com.yahoo.config.model.test.TestUtil.joinLines; import static org.junit.jupiter.api.Assertions.*; /** @@ -334,4 +335,36 @@ public class AttributeSettingsTestCase extends AbstractSchemaTestCase { assertSame(single.getAliases(), array.getAliases()); } + @Test + void distance_metric_is_propagated_to_attributes_config() throws ParseException { + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.ANGULAR, "angular"); + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.EUCLIDEAN, "euclidean"); + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.HAMMING, "hamming"); + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.GEODEGREES, "geodegrees"); + // TODO Vespa 9: Remove 'innerproduct' as alias for 'prenormalized-angular'. + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.INNERPRODUCT, "innerproduct"); + assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.PRENORMALIZED_ANGULAR, "prenormalized-angular"); + } + + private void assertDerivedDistanceMetric(AttributesConfig.Attribute.Distancemetric.Enum expDistanceMetric, + String schemaDistanceMetric) throws ParseException { + var attrs = new AttributeFields(getSchemaWithDistanceMetric(schemaDistanceMetric)); + var builder = new AttributesConfig.Builder(); + attrs.getConfig(builder, AttributeFields.FieldSet.ALL, 100); + var cfg = builder.build(); + assertEquals(expDistanceMetric, cfg.attribute(0).distancemetric()); + } + + private Schema getSchemaWithDistanceMetric(String distanceMetric) throws ParseException { + return getSchema(joinLines("search test {", + " document test {", + " field t type tensor(x[2]) {", + " indexing: attribute", + " attribute { distance-metric: " + distanceMetric + "}", + " }", + " }", + "}")); + } + + } diff --git a/configdefinitions/src/vespa/attributes.def b/configdefinitions/src/vespa/attributes.def index 4f30e4610ec..00787b928a6 100644 --- a/configdefinitions/src/vespa/attributes.def +++ b/configdefinitions/src/vespa/attributes.def @@ -37,7 +37,7 @@ attribute[].maxuncommittedmemory long default=130000 # The distance metric to use for nearest neighbor search. # Is only used when the attribute is a 1-dimensional indexed tensor. -attribute[].distancemetric enum { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR } default=EUCLIDEAN +attribute[].distancemetric enum { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING, PRENORMALIZED_ANGULAR, DOTPRODUCT } default=EUCLIDEAN # Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor for approximate nearest neighbor search. attribute[].index.hnsw.enabled bool default=false diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index ab36213246e..92b310ffe1e 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -180,6 +180,16 @@ assertCollectionType(CollectionType exp, AttributesConfig::Attribute::Collection EXPECT_EQUAL(exp.createIfNonExistant(), out.collectionType().createIfNonExistant()); } +void +expect_distance_metric(AttributesConfig::Attribute::Distancemetric in_metric, + DistanceMetric out_metric) +{ + AttributesConfig::Attribute a; + a.distancemetric = in_metric; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.distance_metric() == out_metric); +} + TEST("require that config can be converted") { @@ -254,16 +264,13 @@ TEST("require that config can be converted") EXPECT_TRUE(out.distance_metric() == DistanceMetric::Euclidean); } { // distance metric (explicit) - CACA a; - a.distancemetric = AttributesConfig::Attribute::Distancemetric::GEODEGREES; - auto out = ConfigConverter::convert(a); - EXPECT_TRUE(out.distance_metric() == DistanceMetric::GeoDegrees); - } - { // distance metric (explicit) - CACA a; - a.distancemetric = AttributesConfig::Attribute::Distancemetric::INNERPRODUCT; - auto out = ConfigConverter::convert(a); - EXPECT_TRUE(out.distance_metric() == DistanceMetric::InnerProduct); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::EUCLIDEAN, DistanceMetric::Euclidean); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::ANGULAR, DistanceMetric::Angular); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::GEODEGREES, DistanceMetric::GeoDegrees); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::HAMMING, DistanceMetric::Hamming); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::INNERPRODUCT, DistanceMetric::InnerProduct); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::PRENORMALIZED_ANGULAR, DistanceMetric::PrenormalizedAngular); + expect_distance_metric(AttributesConfig::Attribute::Distancemetric::DOTPRODUCT, DistanceMetric::Dotproduct); } { // hnsw index default params (enabled) CACA a; diff --git a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h index c157f6abb28..9f9f45810b9 100644 --- a/searchlib/src/vespa/searchcommon/attribute/distance_metric.h +++ b/searchlib/src/vespa/searchcommon/attribute/distance_metric.h @@ -4,6 +4,6 @@ namespace search::attribute { -enum class DistanceMetric { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, TransformedMips }; +enum class DistanceMetric { Euclidean, Angular, GeoDegrees, InnerProduct, Hamming, PrenormalizedAngular, Dotproduct }; } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index 0edab90f089..122c2c0c55e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -29,7 +29,7 @@ const vespalib::string angular = "angular"; const vespalib::string geodegrees = "geodegrees"; const vespalib::string innerproduct = "innerproduct"; const vespalib::string prenormalized_angular = "prenormalized_angular"; -const vespalib::string transformed_mips = "transformed_mips"; +const vespalib::string dotproduct = "dotproduct"; const vespalib::string hamming = "hamming"; const vespalib::string doc_id_limit_tag = "docIdLimit"; const vespalib::string enumerated_tag = "enumerated"; @@ -104,7 +104,7 @@ to_string(DistanceMetric metric) case DistanceMetric::InnerProduct: return innerproduct; case DistanceMetric::Hamming: return hamming; case DistanceMetric::PrenormalizedAngular: return prenormalized_angular; - case DistanceMetric::TransformedMips: return transformed_mips; + case DistanceMetric::Dotproduct: return dotproduct; } throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast<int>(metric))); } @@ -122,8 +122,8 @@ to_distance_metric(const vespalib::string& metric) return DistanceMetric::InnerProduct; } else if (metric == prenormalized_angular) { return DistanceMetric::PrenormalizedAngular; - } else if (metric == transformed_mips) { - return DistanceMetric::TransformedMips; + } else if (metric == dotproduct) { + return DistanceMetric::Dotproduct; } else if (metric == hamming) { return DistanceMetric::Hamming; } else { diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index 2119f441a14..7f04efd940b 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -136,10 +136,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) break; case CfgDm::PRENORMALIZED_ANGULAR: dm = DistanceMetric::PrenormalizedAngular; - /* - case CfgDm::TRANSFORMED_MIPS: - dm = DistanceMetric::TransformedMips; - */ + break; + case CfgDm::DOTPRODUCT: + dm = DistanceMetric::Dotproduct; break; } retval.set_distance_metric(dm); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index a338bf85e43..68988ef6308 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -39,7 +39,7 @@ make_distance_function_factory(search::attribute::DistanceMetric variant, case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>(); default: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>(); } - case DistanceMetric::TransformedMips: + case DistanceMetric::Dotproduct: switch (cell_type) { case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>(); case CellType::INT8: return std::make_unique<MipsDistanceFunctionFactory<vespalib::eval::Int8Float>>(); |