diff options
10 files changed, 62 insertions, 49 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java index 90f061d933d..aba6cf9a233 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java @@ -10,11 +10,11 @@ import java.io.Serializable; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; +import java.util.Locale; import java.util.Objects; import java.util.Optional; import java.util.Set; - /** * An index definition in a search definition. * Two indices are equal if they have the same name and the same settings, except @@ -24,6 +24,8 @@ import java.util.Set; */ public class Index implements Cloneable, Serializable { + public static enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES } + public enum Type { VESPA("vespa"); @@ -61,7 +63,9 @@ public class Index implements Cloneable, Serializable { /** The boolean index definition, if set */ private BooleanIndexDefinition boolIndex; - private Optional<HnswIndexParams> hnswIndexParams; + private Optional<HnswIndexParams> hnswIndexParams = Optional.empty(); + + private Optional<DistanceMetric> distanceMetric = Optional.empty(); /** Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. */ private boolean interleavedFeatures = false; @@ -134,12 +138,13 @@ public class Index implements Cloneable, Serializable { stemming == index.stemming && type == index.type && Objects.equals(boolIndex, index.boolIndex) && + Objects.equals(distanceMetric, index.distanceMetric) && Objects.equals(hnswIndexParams, index.hnswIndexParams); } @Override public int hashCode() { - return Objects.hash(name, rankType, prefix, aliases, stemming, normalized, type, boolIndex, hnswIndexParams, interleavedFeatures); + return Objects.hash(name, rankType, prefix, aliases, stemming, normalized, type, boolIndex, distanceMetric, hnswIndexParams, interleavedFeatures); } public String toString() { @@ -187,6 +192,16 @@ public class Index implements Cloneable, Serializable { boolIndex = def; } + public Optional<DistanceMetric> getDistanceMetric() { + return distanceMetric; + } + + public void setDistanceMetric(String value) { + String upper = value.toUpperCase(Locale.ENGLISH); + DistanceMetric dm = DistanceMetric.valueOf(upper); + distanceMetric = Optional.of(dm); + } + public Optional<HnswIndexParams> getHnswIndexParams() { return hnswIndexParams; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java index 5b87fdcf5f6..8b5f7658475 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java @@ -240,13 +240,14 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce aaB.tensortype(attribute.tensorType().get().toString()); } aaB.imported(imported); + var dma = attribute.distanceMetric(); if (attribute.hnswIndexParams().isPresent()) { var ib = new AttributesConfig.Attribute.Index.Builder(); var params = attribute.hnswIndexParams().get(); ib.hnsw.enabled(true); ib.hnsw.maxlinkspernode(params.maxLinksPerNode()); ib.hnsw.neighborstoexploreatinsert(params.neighborsToExploreAtInsert()); - var dm = AttributesConfig.Attribute.Index.Hnsw.Distancemetric.Enum.valueOf(params.distanceMetric().toString()); + var dm = AttributesConfig.Attribute.Index.Hnsw.Distancemetric.Enum.valueOf(dma.toString()); ib.hnsw.distancemetric(dm); aaB.index(ib); } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java index 9ed5e4ca2de..1661a80f238 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java @@ -24,6 +24,7 @@ import com.yahoo.document.datatypes.Float16FieldValue; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.tensor.TensorType; +import static com.yahoo.searchdefinition.Index.DistanceMetric; import java.io.Serializable; import java.util.LinkedHashSet; @@ -66,7 +67,9 @@ public final class Attribute implements Cloneable, Serializable { /** This is set if the type of this is REFERENCE */ private final Optional<StructuredDataType> referenceDocumentType; - private Optional<HnswIndexParams> hnswIndexParams; + private Optional<DistanceMetric> distanceMetric = Optional.empty(); + + private Optional<HnswIndexParams> hnswIndexParams = Optional.empty(); private boolean isPosition = false; private final Sorting sorting = new Sorting(); @@ -152,7 +155,6 @@ public final class Attribute implements Cloneable, Serializable { setCollectionType(collectionType); this.tensorType = tensorType; this.referenceDocumentType = referenceDocumentType; - this.hnswIndexParams = Optional.empty(); } public Attribute convertToArray() { @@ -197,6 +199,11 @@ public final class Attribute implements Cloneable, Serializable { public double densePostingListThreshold() { return densePostingListThreshold; } public Optional<TensorType> tensorType() { return tensorType; } public Optional<StructuredDataType> referenceDocumentType() { return referenceDocumentType; } + + public static final DistanceMetric DEFAULT_DISTANCE_METRIC = DistanceMetric.EUCLIDEAN; + public DistanceMetric distanceMetric() { + return distanceMetric.orElse(DEFAULT_DISTANCE_METRIC); + } public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; } public Sorting getSorting() { return sorting; } @@ -221,6 +228,7 @@ public final class Attribute implements Cloneable, Serializable { public void setUpperBound(long upperBound) { this.upperBound = upperBound; } public void setDensePostingListThreshold(double threshold) { this.densePostingListThreshold = threshold; } public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); } + public void setDistanceMetric(Optional<DistanceMetric> dm) { this.distanceMetric = dm; } public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); } public String getName() { return name; } @@ -354,8 +362,8 @@ public final class Attribute implements Cloneable, Serializable { /** Returns whether these attributes describes the same entity, even if they have different names */ public boolean isCompatible(Attribute other) { - if ( ! this.type.equals(other.type)) return false; - if ( ! this.collectionType.equals(other.collectionType)) return false; + if (! this.type.equals(other.type)) return false; + if (! this.collectionType.equals(other.collectionType)) return false; if (this.isPrefetch() != other.isPrefetch()) return false; if (this.removeIfZero != other.removeIfZero) return false; if (this.createIfNonExistent != other.createIfNonExistent) return false; @@ -364,10 +372,11 @@ public final class Attribute implements Cloneable, Serializable { // if (this.noSearch != other.noSearch) return false; No backend consequences so compatible for now if (this.fastSearch != other.fastSearch) return false; if (this.huge != other.huge) return false; - if ( ! this.sorting.equals(other.sorting)) return false; - if (!this.tensorType.equals(other.tensorType)) return false; - if (!this.referenceDocumentType.equals(other.referenceDocumentType)) return false; - if (!this.hnswIndexParams.equals(other.hnswIndexParams)) return false; + if (! this.sorting.equals(other.sorting)) return false; + if (! Objects.equals(tensorType, other.tensorType)) return false; + if (! Objects.equals(referenceDocumentType, other.referenceDocumentType)) return false; + if (! Objects.equals(distanceMetric, other.distanceMetric)) return false; + if (! Objects.equals(hnswIndexParams, other.hnswIndexParams)) return false; return true; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java index 01434be8785..2f084d3e513 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java @@ -13,18 +13,13 @@ public class HnswIndexParams { public static final int DEFAULT_MAX_LINKS_PER_NODE = 16; public static final int DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT = 200; - public static final DistanceMetric DEFAULT_DISTANCE_METRIC = DistanceMetric.EUCLIDEAN; private final Optional<Integer> maxLinksPerNode; private final Optional<Integer> neighborsToExploreAtInsert; - private final Optional<DistanceMetric> distanceMetric; - - public static enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES } public static class Builder { private Optional<Integer> maxLinksPerNode = Optional.empty(); private Optional<Integer> neighborsToExploreAtInsert = Optional.empty(); - private Optional<DistanceMetric> distanceMetric = Optional.empty(); public void setMaxLinksPerNode(int value) { maxLinksPerNode = Optional.of(value); @@ -32,38 +27,31 @@ public class HnswIndexParams { public void setNeighborsToExploreAtInsert(int value) { neighborsToExploreAtInsert = Optional.of(value); } - public void setDistanceMetric(String value) { - String upper = value.toUpperCase(Locale.ENGLISH); - DistanceMetric dm = DistanceMetric.valueOf(upper); - distanceMetric = Optional.of(dm); - } public HnswIndexParams build() { - return new HnswIndexParams(maxLinksPerNode, neighborsToExploreAtInsert, distanceMetric); + return new HnswIndexParams(maxLinksPerNode, neighborsToExploreAtInsert); } } public HnswIndexParams() { this.maxLinksPerNode = Optional.empty(); this.neighborsToExploreAtInsert = Optional.empty(); - this.distanceMetric = Optional.empty(); } public HnswIndexParams(Optional<Integer> maxLinksPerNode, - Optional<Integer> neighborsToExploreAtInsert, - Optional<DistanceMetric> distanceMetric) { + Optional<Integer> neighborsToExploreAtInsert) { this.maxLinksPerNode = maxLinksPerNode; this.neighborsToExploreAtInsert = neighborsToExploreAtInsert; - this.distanceMetric = distanceMetric; } /** * Creates a new instance where values from the given parameter instance are used where they are present, * otherwise we use values from this. */ - public HnswIndexParams overrideFrom(HnswIndexParams rhs) { + public HnswIndexParams overrideFrom(Optional<HnswIndexParams> other) { + if (! other.isPresent()) return this; + HnswIndexParams rhs = other.get(); return new HnswIndexParams(rhs.maxLinksPerNode.or(() -> maxLinksPerNode), - rhs.neighborsToExploreAtInsert.or(() -> neighborsToExploreAtInsert), - rhs.distanceMetric.or(() -> distanceMetric)); + rhs.neighborsToExploreAtInsert.or(() -> neighborsToExploreAtInsert)); } public int maxLinksPerNode() { @@ -73,8 +61,4 @@ public class HnswIndexParams { public int neighborsToExploreAtInsert() { return neighborsToExploreAtInsert.orElse(DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT); } - - public DistanceMetric distanceMetric() { - return distanceMetric.orElse(DEFAULT_DISTANCE_METRIC); - } } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java index 7f9da28b9ca..0c1f443dee3 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java @@ -32,6 +32,7 @@ public class IndexOperation implements FieldOperation { private OptionalDouble densePostingListThreshold = OptionalDouble.empty(); private Optional<Boolean> enableBm25 = Optional.empty(); + private Optional<String> distanceMetric = Optional.empty(); private Optional<HnswIndexParams.Builder> hnswIndexParams = Optional.empty(); public String getIndexName() { @@ -94,6 +95,9 @@ public class IndexOperation implements FieldOperation { if (enableBm25.isPresent()) { index.setInterleavedFeatures(enableBm25.get()); } + if (distanceMetric.isPresent()) { + index.setDistanceMetric(distanceMetric.get()); + } if (hnswIndexParams.isPresent()) { index.setHnswIndexParams(hnswIndexParams.get().build()); } @@ -127,6 +131,10 @@ public class IndexOperation implements FieldOperation { enableBm25 = Optional.of(value); } + public void setDistanceMetric(String value) { + this.distanceMetric = Optional.of(value); + } + public void setHnswIndexParams(HnswIndexParams.Builder params) { this.hnswIndexParams = Optional.of(params); } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java index 2790f2ddf6e..c97ee2bd935 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java @@ -81,8 +81,9 @@ public class TensorFieldProcessor extends Processor { var index = field.getIndex(field.getName()); // TODO: Calculate default params based on tensor dimension size var params = new HnswIndexParams(); - if (index != null && index.getHnswIndexParams().isPresent()) { - params = params.overrideFrom(index.getHnswIndexParams().get()); + if (index != null) { + params = params.overrideFrom(index.getHnswIndexParams()); + field.getAttribute().setDistanceMetric(index.getDistanceMetric()); } field.getAttribute().setHnswIndexParams(params); } diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index cca56c209c8..3560cf2cd84 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -1816,6 +1816,7 @@ Object indexBody(IndexOperation index) : | <UPPERBOUND> <COLON> num = consumeLong() { index.setUpperBound(num); } | <DENSEPOSTINGLISTTHRESHOLD> <COLON> threshold = consumeFloat() { index.setDensePostingListThreshold(threshold); } | <ENABLE_BM25> { index.setEnableBm25(true); } + | <DISTANCEMETRIC> <COLON> str = identifierWithDash() { index.setDistanceMetric(str); } | hnswIndex(index) { } ) { return null; } @@ -1841,7 +1842,6 @@ void hnswIndexBody(HnswIndexParams.Builder params) : } { ( <MAXLINKSPERNODE> <COLON> num = integer() { params.setMaxLinksPerNode(num); } - | <DISTANCEMETRIC> <COLON> str = identifierWithDash() { params.setDistanceMetric(str); } | <NEIGHBORSTOEXPLOREATINSERT> <COLON> num = integer() { params.setNeighborsToExploreAtInsert(num); } ) } diff --git a/config-model/src/test/derived/hnsw_index/test.sd b/config-model/src/test/derived/hnsw_index/test.sd index 3b954e74fc5..207ed764a87 100644 --- a/config-model/src/test/derived/hnsw_index/test.sd +++ b/config-model/src/test/derived/hnsw_index/test.sd @@ -3,9 +3,9 @@ search test { field t1 type tensor(x[128]) { indexing: attribute | index index { + distance-metric: angular hnsw { max-links-per-node: 32 - distance-metric: angular neighbors-to-explore-at-insert: 300 } } diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/NearestNeighborTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/NearestNeighborTestCase.java index ead4e586d9f..9f57b22fd58 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/derived/NearestNeighborTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/NearestNeighborTestCase.java @@ -31,6 +31,9 @@ public class NearestNeighborTestCase extends AbstractExportingTestCase { } catch (QueryException e) { // success assertEquals("Invalid request parameter", e.getMessage()); + } catch (RuntimeException e) { + e.printStackTrace(); + throw e; } } diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/document/HnswIndexParamsTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/document/HnswIndexParamsTestCase.java index d687590faf2..e3dcc925e5e 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/document/HnswIndexParamsTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/document/HnswIndexParamsTestCase.java @@ -2,13 +2,13 @@ package com.yahoo.searchdefinition.document; +import java.util.Optional; import org.junit.Test; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; -import static com.yahoo.searchdefinition.document.HnswIndexParams.DistanceMetric; public class HnswIndexParamsTestCase { @@ -18,35 +18,27 @@ public class HnswIndexParamsTestCase { var builder = new HnswIndexParams.Builder(); builder.setMaxLinksPerNode(7); var one = builder.build(); - builder.setDistanceMetric("angular"); - var two = builder.build(); builder.setNeighborsToExploreAtInsert(42); var three = builder.build(); builder.setMaxLinksPerNode(17); - builder.setDistanceMetric("geodegrees"); builder.setNeighborsToExploreAtInsert(500); var four = builder.build(); assertThat(empty.maxLinksPerNode(), is(16)); - assertThat(empty.distanceMetric(), is(DistanceMetric.EUCLIDEAN)); assertThat(empty.neighborsToExploreAtInsert(), is(200)); assertThat(one.maxLinksPerNode(), is(7)); - assertThat(two.distanceMetric(), is(DistanceMetric.ANGULAR)); assertThat(three.neighborsToExploreAtInsert(), is(42)); assertThat(four.maxLinksPerNode(), is(17)); - assertThat(four.distanceMetric(), is(DistanceMetric.GEODEGREES)); assertThat(four.neighborsToExploreAtInsert(), is(500)); - var five = four.overrideFrom(empty); + var five = four.overrideFrom(Optional.of(empty)); assertThat(five.maxLinksPerNode(), is(17)); - assertThat(five.distanceMetric(), is(DistanceMetric.GEODEGREES)); assertThat(five.neighborsToExploreAtInsert(), is(500)); - var six = four.overrideFrom(two); + var six = four.overrideFrom(Optional.of(one)); assertThat(six.maxLinksPerNode(), is(7)); - assertThat(six.distanceMetric(), is(DistanceMetric.ANGULAR)); assertThat(six.neighborsToExploreAtInsert(), is(500)); } |