diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-02-17 16:30:41 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2020-02-17 16:30:41 +0000 |
commit | d96f6d95757cbf9e51de1bbb871bc8ebfd221b55 (patch) | |
tree | 3a5438066d2b7e5d91cf90a72b8e098ee22ee820 /config-model/src/main/java/com/yahoo | |
parent | c2d7a2936f9a6c92b086dbb738306bc1d690115d (diff) |
Add support for specifying a hnsw index together with a 1-dimensional indexed tensor.
Diffstat (limited to 'config-model/src/main/java/com/yahoo')
7 files changed, 165 insertions, 20 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java index e46db1d1b5f..90f061d933d 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java @@ -2,6 +2,7 @@ package com.yahoo.searchdefinition; import com.yahoo.searchdefinition.document.BooleanIndexDefinition; +import com.yahoo.searchdefinition.document.HnswIndexParams; import com.yahoo.searchdefinition.document.RankType; import com.yahoo.searchdefinition.document.Stemming; @@ -9,8 +10,11 @@ import java.io.Serializable; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Optional; import java.util.Set; + /** * An index definition in a search definition. * Two indices are equal if they have the same name and the same settings, except @@ -57,6 +61,8 @@ public class Index implements Cloneable, Serializable { /** The boolean index definition, if set */ private BooleanIndexDefinition boolIndex; + private Optional<HnswIndexParams> hnswIndexParams; + /** Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. */ private boolean interleavedFeatures = false; @@ -115,20 +121,25 @@ public class Index implements Cloneable, Serializable { } @Override - public int hashCode() { - return name.hashCode() + ( prefix ? 17 : 0 ); + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Index index = (Index) o; + return prefix == index.prefix && + normalized == index.normalized && + interleavedFeatures == index.interleavedFeatures && + Objects.equals(name, index.name) && + rankType == index.rankType && + Objects.equals(aliases, index.aliases) && + stemming == index.stemming && + type == index.type && + Objects.equals(boolIndex, index.boolIndex) && + Objects.equals(hnswIndexParams, index.hnswIndexParams); } @Override - public boolean equals(Object object) { - if ( ! (object instanceof Index)) return false; - - Index other=(Index)object; - return - this.name.equals(other.name) && - this.prefix==other.prefix && - this.stemming==other.stemming && - this.normalized==other.normalized; + public int hashCode() { + return Objects.hash(name, rankType, prefix, aliases, stemming, normalized, type, boolIndex, hnswIndexParams, interleavedFeatures); } public String toString() { @@ -176,6 +187,14 @@ public class Index implements Cloneable, Serializable { boolIndex = def; } + public Optional<HnswIndexParams> getHnswIndexParams() { + return hnswIndexParams; + } + + public void setHnswIndexParams(HnswIndexParams params) { + hnswIndexParams = Optional.of(params); + } + public void setInterleavedFeatures(boolean value) { interleavedFeatures = value; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java index 76dff404568..8de8b239c93 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java @@ -240,6 +240,14 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce aaB.tensortype(attribute.tensorType().get().toString()); } aaB.imported(imported); + if (attribute.hnswIndexParams().isPresent()) { + var ib = new AttributesConfig.Attribute.Index.Builder(); + var params = attribute.hnswIndexParams().get(); + ib.hnsw.enabled(true); + ib.hnsw.maxlinkspernode(params.maxLinksPerNode()); + ib.hnsw.neighborstoexploreatinsert(params.neighborsToExploreAtInsert()); + aaB.index(ib); + } return aaB; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java index d8773063053..39a67a69575 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java @@ -5,6 +5,7 @@ import com.yahoo.document.ArrayDataType; import com.yahoo.document.DataType; import com.yahoo.document.Field; import com.yahoo.document.StructuredDataType; +import com.yahoo.document.TensorDataType; import com.yahoo.document.WeightedSetDataType; import com.yahoo.searchdefinition.Search; import com.yahoo.searchdefinition.document.BooleanIndexDefinition; @@ -20,7 +21,7 @@ import java.util.List; import java.util.Map; /** - * Deriver of indexschema config containing information of all index fields with name and data type. + * Deriver of indexschema config containing information of all text index fields with name and data type. * * @author geirst */ @@ -44,9 +45,14 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { super.derive(search); } + private boolean isTensorField(ImmutableSDField field) { + return field.getDataType() instanceof TensorDataType; + } + private void deriveIndexFields(ImmutableSDField field, Search search) { - if (!field.doesIndexing() && - !field.isIndexStructureField()) + // Note: Indexes for tensor fields are NOT part of the index schema for text fields. + if ((!field.doesIndexing() && !field.isIndexStructureField()) || + isTensorField(field)) { return; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java index fbcaf2a3a80..9ed5e4ca2de 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java @@ -66,6 +66,8 @@ public final class Attribute implements Cloneable, Serializable { /** This is set if the type of this is REFERENCE */ private final Optional<StructuredDataType> referenceDocumentType; + private Optional<HnswIndexParams> hnswIndexParams; + private boolean isPosition = false; private final Sorting sorting = new Sorting(); @@ -150,6 +152,7 @@ public final class Attribute implements Cloneable, Serializable { setCollectionType(collectionType); this.tensorType = tensorType; this.referenceDocumentType = referenceDocumentType; + this.hnswIndexParams = Optional.empty(); } public Attribute convertToArray() { @@ -194,6 +197,7 @@ public final class Attribute implements Cloneable, Serializable { public double densePostingListThreshold() { return densePostingListThreshold; } public Optional<TensorType> tensorType() { return tensorType; } public Optional<StructuredDataType> referenceDocumentType() { return referenceDocumentType; } + public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; } public Sorting getSorting() { return sorting; } @@ -217,6 +221,7 @@ public final class Attribute implements Cloneable, Serializable { public void setUpperBound(long upperBound) { this.upperBound = upperBound; } public void setDensePostingListThreshold(double threshold) { this.densePostingListThreshold = threshold; } public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); } + public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); } public String getName() { return name; } public Type getType() { return type; } @@ -335,7 +340,7 @@ public final class Attribute implements Cloneable, Serializable { public int hashCode() { return Objects.hash( name, type, collectionType, sorting, isPrefetch(), fastAccess, removeIfZero, createIfNonExistent, - isPosition, huge, enableBitVectors, enableOnlyBitVector, tensorType, referenceDocumentType); + isPosition, huge, enableBitVectors, enableOnlyBitVector, tensorType, referenceDocumentType, hnswIndexParams); } @Override @@ -362,6 +367,7 @@ public final class Attribute implements Cloneable, Serializable { if ( ! this.sorting.equals(other.sorting)) return false; if (!this.tensorType.equals(other.tensorType)) return false; if (!this.referenceDocumentType.equals(other.referenceDocumentType)) return false; + if (!this.hnswIndexParams.equals(other.hnswIndexParams)) return false; return true; } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java new file mode 100644 index 00000000000..70d0df8be7f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java @@ -0,0 +1,60 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchdefinition.document; + +import java.util.OptionalInt; + +/** + * Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor for approximate nearest neighbor search. + * + * @author geirst + */ +public class HnswIndexParams { + + public static final int DEFAULT_MAX_LINKS_PER_NODE = 16; + public static final int DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT = 200; + + private final OptionalInt maxLinksPerNode; + private final OptionalInt neighborsToExploreAtInsert; + + public static class Builder { + private OptionalInt maxLinksPerNode = OptionalInt.empty(); + private OptionalInt neighborsToExploreAtInsert = OptionalInt.empty(); + + public void setMaxLinksPerNode(int value) { + maxLinksPerNode = OptionalInt.of(value); + } + public void setNeighborsToExploreAtInsert(int value) { + neighborsToExploreAtInsert = OptionalInt.of(value); + } + public HnswIndexParams build() { + return new HnswIndexParams(maxLinksPerNode, neighborsToExploreAtInsert); + } + } + + public HnswIndexParams() { + this.maxLinksPerNode = OptionalInt.empty(); + this.neighborsToExploreAtInsert = OptionalInt.empty(); + } + + public HnswIndexParams(OptionalInt maxLinksPerNode, OptionalInt neighborsToExploreAtInsert) { + this.maxLinksPerNode = maxLinksPerNode; + this.neighborsToExploreAtInsert = neighborsToExploreAtInsert; + } + + /** + * Creates a new instance where values from the given parameter instance are used where they are present, + * otherwise we use values from this. + */ + public HnswIndexParams overrideFrom(HnswIndexParams rhs) { + return new HnswIndexParams(rhs.maxLinksPerNode.isPresent() ? rhs.maxLinksPerNode : maxLinksPerNode, + rhs.neighborsToExploreAtInsert.isPresent() ? rhs.neighborsToExploreAtInsert : neighborsToExploreAtInsert); + } + + public int maxLinksPerNode() { + return maxLinksPerNode.orElse(DEFAULT_MAX_LINKS_PER_NODE); + } + + public int neighborsToExploreAtInsert() { + return neighborsToExploreAtInsert.orElse(DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT); + } +} diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java index 39f543c7db3..7f9da28b9ca 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java @@ -4,6 +4,7 @@ package com.yahoo.searchdefinition.fieldoperation; import com.yahoo.searchdefinition.Index; import com.yahoo.searchdefinition.Index.Type; import com.yahoo.searchdefinition.document.BooleanIndexDefinition; +import com.yahoo.searchdefinition.document.HnswIndexParams; import com.yahoo.searchdefinition.document.SDField; import com.yahoo.searchdefinition.document.Stemming; @@ -31,6 +32,8 @@ public class IndexOperation implements FieldOperation { private OptionalDouble densePostingListThreshold = OptionalDouble.empty(); private Optional<Boolean> enableBm25 = Optional.empty(); + private Optional<HnswIndexParams.Builder> hnswIndexParams = Optional.empty(); + public String getIndexName() { return indexName; } @@ -91,6 +94,9 @@ public class IndexOperation implements FieldOperation { if (enableBm25.isPresent()) { index.setInterleavedFeatures(enableBm25.get()); } + if (hnswIndexParams.isPresent()) { + index.setHnswIndexParams(hnswIndexParams.get().build()); + } } public Type getType() { @@ -116,8 +122,13 @@ public class IndexOperation implements FieldOperation { public void setDensePostingListThreshold(double densePostingListThreshold) { this.densePostingListThreshold = OptionalDouble.of(densePostingListThreshold); } + public void setEnableBm25(boolean value) { enableBm25 = Optional.of(value); } + public void setHnswIndexParams(HnswIndexParams.Builder params) { + this.hnswIndexParams = Optional.of(params); + } + } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java index 8e54d7c00d6..9cd7fb24e42 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java @@ -6,7 +6,8 @@ import com.yahoo.document.CollectionDataType; import com.yahoo.document.TensorDataType; import com.yahoo.searchdefinition.RankProfileRegistry; import com.yahoo.searchdefinition.Search; -import com.yahoo.searchdefinition.document.Attribute; +import com.yahoo.searchdefinition.document.HnswIndexParams; +import com.yahoo.searchdefinition.document.ImmutableSDField; import com.yahoo.searchdefinition.document.SDField; import com.yahoo.vespa.model.container.search.QueryProfiles; @@ -25,10 +26,11 @@ public class TensorFieldProcessor extends Processor { public void process(boolean validate, boolean documentsOnly) { if ( ! validate) return; - for (SDField field : search.allConcreteFields()) { + for (var field : search.allConcreteFields()) { if ( field.getDataType() instanceof TensorDataType ) { validateIndexingScripsForTensorField(field); validateAttributeSettingForTensorField(field); + processIndexSettingsForTensorField(field); } else if (field.getDataType() instanceof CollectionDataType){ validateDataTypeForCollectionField(field); @@ -37,20 +39,53 @@ public class TensorFieldProcessor extends Processor { } private void validateIndexingScripsForTensorField(SDField field) { - if (field.doesIndexing()) { - fail(search, field, "A field of type 'tensor' cannot be specified as an 'index' field."); + if (field.doesIndexing() && !isTensorTypeThatSupportsHnswIndex(field)) { + fail(search, field, "A tensor of type '" + tensorTypeToString(field) + "' does not support having an 'index'. " + + "Currently, only tensors with 1 indexed dimension supports that."); } } + private boolean isTensorTypeThatSupportsHnswIndex(ImmutableSDField field) { + var type = ((TensorDataType)field.getDataType()).getTensorType(); + // Tensors with 1 indexed dimension supports a hnsw index (used for approximate nearest neighbor search). + if ((type.dimensions().size() == 1) && + type.dimensions().get(0).isIndexed()) { + return true; + } + return false; + } + + private String tensorTypeToString(ImmutableSDField field) { + return ((TensorDataType)field.getDataType()).getTensorType().toString(); + } + private void validateAttributeSettingForTensorField(SDField field) { if (field.doesAttributing()) { - Attribute attribute = field.getAttributes().get(field.getName()); + var attribute = field.getAttributes().get(field.getName()); if (attribute != null && attribute.isFastSearch()) { fail(search, field, "An attribute of type 'tensor' cannot be 'fast-search'."); } } } + private void processIndexSettingsForTensorField(SDField field) { + if (!field.doesIndexing()) { + return; + } + if (isTensorTypeThatSupportsHnswIndex(field)) { + if (!field.doesAttributing()) { + fail(search, field, "A tensor that has an index must also be an attribute."); + } + var index = field.getIndex(field.getName()); + // TODO: Calculate default params based on tensor dimension size + var params = new HnswIndexParams(); + if (index != null && index.getHnswIndexParams().isPresent()) { + params = params.overrideFrom(index.getHnswIndexParams().get()); + } + field.getAttribute().setHnswIndexParams(params); + } + } + private void validateDataTypeForCollectionField(SDField field) { if (((CollectionDataType)field.getDataType()).getNestedType() instanceof TensorDataType) fail(search, field, "A field with collection type of tensor is not supported. Use simple type 'tensor' instead."); |