summaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java')
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/Index.java41
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java8
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java12
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java8
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java60
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java11
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java45
7 files changed, 165 insertions, 20 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java
index e46db1d1b5f..90f061d933d 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java
@@ -2,6 +2,7 @@
package com.yahoo.searchdefinition;
import com.yahoo.searchdefinition.document.BooleanIndexDefinition;
+import com.yahoo.searchdefinition.document.HnswIndexParams;
import com.yahoo.searchdefinition.document.RankType;
import com.yahoo.searchdefinition.document.Stemming;
@@ -9,8 +10,11 @@ import java.io.Serializable;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
+import java.util.Objects;
+import java.util.Optional;
import java.util.Set;
+
/**
* An index definition in a search definition.
* Two indices are equal if they have the same name and the same settings, except
@@ -57,6 +61,8 @@ public class Index implements Cloneable, Serializable {
/** The boolean index definition, if set */
private BooleanIndexDefinition boolIndex;
+ private Optional<HnswIndexParams> hnswIndexParams;
+
/** Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. */
private boolean interleavedFeatures = false;
@@ -115,20 +121,25 @@ public class Index implements Cloneable, Serializable {
}
@Override
- public int hashCode() {
- return name.hashCode() + ( prefix ? 17 : 0 );
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Index index = (Index) o;
+ return prefix == index.prefix &&
+ normalized == index.normalized &&
+ interleavedFeatures == index.interleavedFeatures &&
+ Objects.equals(name, index.name) &&
+ rankType == index.rankType &&
+ Objects.equals(aliases, index.aliases) &&
+ stemming == index.stemming &&
+ type == index.type &&
+ Objects.equals(boolIndex, index.boolIndex) &&
+ Objects.equals(hnswIndexParams, index.hnswIndexParams);
}
@Override
- public boolean equals(Object object) {
- if ( ! (object instanceof Index)) return false;
-
- Index other=(Index)object;
- return
- this.name.equals(other.name) &&
- this.prefix==other.prefix &&
- this.stemming==other.stemming &&
- this.normalized==other.normalized;
+ public int hashCode() {
+ return Objects.hash(name, rankType, prefix, aliases, stemming, normalized, type, boolIndex, hnswIndexParams, interleavedFeatures);
}
public String toString() {
@@ -176,6 +187,14 @@ public class Index implements Cloneable, Serializable {
boolIndex = def;
}
+ public Optional<HnswIndexParams> getHnswIndexParams() {
+ return hnswIndexParams;
+ }
+
+ public void setHnswIndexParams(HnswIndexParams params) {
+ hnswIndexParams = Optional.of(params);
+ }
+
public void setInterleavedFeatures(boolean value) {
interleavedFeatures = value;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java
index 76dff404568..8de8b239c93 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java
@@ -240,6 +240,14 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce
aaB.tensortype(attribute.tensorType().get().toString());
}
aaB.imported(imported);
+ if (attribute.hnswIndexParams().isPresent()) {
+ var ib = new AttributesConfig.Attribute.Index.Builder();
+ var params = attribute.hnswIndexParams().get();
+ ib.hnsw.enabled(true);
+ ib.hnsw.maxlinkspernode(params.maxLinksPerNode());
+ ib.hnsw.neighborstoexploreatinsert(params.neighborsToExploreAtInsert());
+ aaB.index(ib);
+ }
return aaB;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java
index d8773063053..39a67a69575 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java
@@ -5,6 +5,7 @@ import com.yahoo.document.ArrayDataType;
import com.yahoo.document.DataType;
import com.yahoo.document.Field;
import com.yahoo.document.StructuredDataType;
+import com.yahoo.document.TensorDataType;
import com.yahoo.document.WeightedSetDataType;
import com.yahoo.searchdefinition.Search;
import com.yahoo.searchdefinition.document.BooleanIndexDefinition;
@@ -20,7 +21,7 @@ import java.util.List;
import java.util.Map;
/**
- * Deriver of indexschema config containing information of all index fields with name and data type.
+ * Deriver of indexschema config containing information of all text index fields with name and data type.
*
* @author geirst
*/
@@ -44,9 +45,14 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer {
super.derive(search);
}
+ private boolean isTensorField(ImmutableSDField field) {
+ return field.getDataType() instanceof TensorDataType;
+ }
+
private void deriveIndexFields(ImmutableSDField field, Search search) {
- if (!field.doesIndexing() &&
- !field.isIndexStructureField())
+ // Note: Indexes for tensor fields are NOT part of the index schema for text fields.
+ if ((!field.doesIndexing() && !field.isIndexStructureField()) ||
+ isTensorField(field))
{
return;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java
index fbcaf2a3a80..9ed5e4ca2de 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java
@@ -66,6 +66,8 @@ public final class Attribute implements Cloneable, Serializable {
/** This is set if the type of this is REFERENCE */
private final Optional<StructuredDataType> referenceDocumentType;
+ private Optional<HnswIndexParams> hnswIndexParams;
+
private boolean isPosition = false;
private final Sorting sorting = new Sorting();
@@ -150,6 +152,7 @@ public final class Attribute implements Cloneable, Serializable {
setCollectionType(collectionType);
this.tensorType = tensorType;
this.referenceDocumentType = referenceDocumentType;
+ this.hnswIndexParams = Optional.empty();
}
public Attribute convertToArray() {
@@ -194,6 +197,7 @@ public final class Attribute implements Cloneable, Serializable {
public double densePostingListThreshold() { return densePostingListThreshold; }
public Optional<TensorType> tensorType() { return tensorType; }
public Optional<StructuredDataType> referenceDocumentType() { return referenceDocumentType; }
+ public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; }
public Sorting getSorting() { return sorting; }
@@ -217,6 +221,7 @@ public final class Attribute implements Cloneable, Serializable {
public void setUpperBound(long upperBound) { this.upperBound = upperBound; }
public void setDensePostingListThreshold(double threshold) { this.densePostingListThreshold = threshold; }
public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); }
+ public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); }
public String getName() { return name; }
public Type getType() { return type; }
@@ -335,7 +340,7 @@ public final class Attribute implements Cloneable, Serializable {
public int hashCode() {
return Objects.hash(
name, type, collectionType, sorting, isPrefetch(), fastAccess, removeIfZero, createIfNonExistent,
- isPosition, huge, enableBitVectors, enableOnlyBitVector, tensorType, referenceDocumentType);
+ isPosition, huge, enableBitVectors, enableOnlyBitVector, tensorType, referenceDocumentType, hnswIndexParams);
}
@Override
@@ -362,6 +367,7 @@ public final class Attribute implements Cloneable, Serializable {
if ( ! this.sorting.equals(other.sorting)) return false;
if (!this.tensorType.equals(other.tensorType)) return false;
if (!this.referenceDocumentType.equals(other.referenceDocumentType)) return false;
+ if (!this.hnswIndexParams.equals(other.hnswIndexParams)) return false;
return true;
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java
new file mode 100644
index 00000000000..70d0df8be7f
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/HnswIndexParams.java
@@ -0,0 +1,60 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchdefinition.document;
+
+import java.util.OptionalInt;
+
+/**
+ * Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor for approximate nearest neighbor search.
+ *
+ * @author geirst
+ */
+public class HnswIndexParams {
+
+ public static final int DEFAULT_MAX_LINKS_PER_NODE = 16;
+ public static final int DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT = 200;
+
+ private final OptionalInt maxLinksPerNode;
+ private final OptionalInt neighborsToExploreAtInsert;
+
+ public static class Builder {
+ private OptionalInt maxLinksPerNode = OptionalInt.empty();
+ private OptionalInt neighborsToExploreAtInsert = OptionalInt.empty();
+
+ public void setMaxLinksPerNode(int value) {
+ maxLinksPerNode = OptionalInt.of(value);
+ }
+ public void setNeighborsToExploreAtInsert(int value) {
+ neighborsToExploreAtInsert = OptionalInt.of(value);
+ }
+ public HnswIndexParams build() {
+ return new HnswIndexParams(maxLinksPerNode, neighborsToExploreAtInsert);
+ }
+ }
+
+ public HnswIndexParams() {
+ this.maxLinksPerNode = OptionalInt.empty();
+ this.neighborsToExploreAtInsert = OptionalInt.empty();
+ }
+
+ public HnswIndexParams(OptionalInt maxLinksPerNode, OptionalInt neighborsToExploreAtInsert) {
+ this.maxLinksPerNode = maxLinksPerNode;
+ this.neighborsToExploreAtInsert = neighborsToExploreAtInsert;
+ }
+
+ /**
+ * Creates a new instance where values from the given parameter instance are used where they are present,
+ * otherwise we use values from this.
+ */
+ public HnswIndexParams overrideFrom(HnswIndexParams rhs) {
+ return new HnswIndexParams(rhs.maxLinksPerNode.isPresent() ? rhs.maxLinksPerNode : maxLinksPerNode,
+ rhs.neighborsToExploreAtInsert.isPresent() ? rhs.neighborsToExploreAtInsert : neighborsToExploreAtInsert);
+ }
+
+ public int maxLinksPerNode() {
+ return maxLinksPerNode.orElse(DEFAULT_MAX_LINKS_PER_NODE);
+ }
+
+ public int neighborsToExploreAtInsert() {
+ return neighborsToExploreAtInsert.orElse(DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT);
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java
index 39f543c7db3..7f9da28b9ca 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java
@@ -4,6 +4,7 @@ package com.yahoo.searchdefinition.fieldoperation;
import com.yahoo.searchdefinition.Index;
import com.yahoo.searchdefinition.Index.Type;
import com.yahoo.searchdefinition.document.BooleanIndexDefinition;
+import com.yahoo.searchdefinition.document.HnswIndexParams;
import com.yahoo.searchdefinition.document.SDField;
import com.yahoo.searchdefinition.document.Stemming;
@@ -31,6 +32,8 @@ public class IndexOperation implements FieldOperation {
private OptionalDouble densePostingListThreshold = OptionalDouble.empty();
private Optional<Boolean> enableBm25 = Optional.empty();
+ private Optional<HnswIndexParams.Builder> hnswIndexParams = Optional.empty();
+
public String getIndexName() {
return indexName;
}
@@ -91,6 +94,9 @@ public class IndexOperation implements FieldOperation {
if (enableBm25.isPresent()) {
index.setInterleavedFeatures(enableBm25.get());
}
+ if (hnswIndexParams.isPresent()) {
+ index.setHnswIndexParams(hnswIndexParams.get().build());
+ }
}
public Type getType() {
@@ -116,8 +122,13 @@ public class IndexOperation implements FieldOperation {
public void setDensePostingListThreshold(double densePostingListThreshold) {
this.densePostingListThreshold = OptionalDouble.of(densePostingListThreshold);
}
+
public void setEnableBm25(boolean value) {
enableBm25 = Optional.of(value);
}
+ public void setHnswIndexParams(HnswIndexParams.Builder params) {
+ this.hnswIndexParams = Optional.of(params);
+ }
+
}
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java
index 8e54d7c00d6..9cd7fb24e42 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TensorFieldProcessor.java
@@ -6,7 +6,8 @@ import com.yahoo.document.CollectionDataType;
import com.yahoo.document.TensorDataType;
import com.yahoo.searchdefinition.RankProfileRegistry;
import com.yahoo.searchdefinition.Search;
-import com.yahoo.searchdefinition.document.Attribute;
+import com.yahoo.searchdefinition.document.HnswIndexParams;
+import com.yahoo.searchdefinition.document.ImmutableSDField;
import com.yahoo.searchdefinition.document.SDField;
import com.yahoo.vespa.model.container.search.QueryProfiles;
@@ -25,10 +26,11 @@ public class TensorFieldProcessor extends Processor {
public void process(boolean validate, boolean documentsOnly) {
if ( ! validate) return;
- for (SDField field : search.allConcreteFields()) {
+ for (var field : search.allConcreteFields()) {
if ( field.getDataType() instanceof TensorDataType ) {
validateIndexingScripsForTensorField(field);
validateAttributeSettingForTensorField(field);
+ processIndexSettingsForTensorField(field);
}
else if (field.getDataType() instanceof CollectionDataType){
validateDataTypeForCollectionField(field);
@@ -37,20 +39,53 @@ public class TensorFieldProcessor extends Processor {
}
private void validateIndexingScripsForTensorField(SDField field) {
- if (field.doesIndexing()) {
- fail(search, field, "A field of type 'tensor' cannot be specified as an 'index' field.");
+ if (field.doesIndexing() && !isTensorTypeThatSupportsHnswIndex(field)) {
+ fail(search, field, "A tensor of type '" + tensorTypeToString(field) + "' does not support having an 'index'. " +
+ "Currently, only tensors with 1 indexed dimension supports that.");
}
}
+ private boolean isTensorTypeThatSupportsHnswIndex(ImmutableSDField field) {
+ var type = ((TensorDataType)field.getDataType()).getTensorType();
+ // Tensors with 1 indexed dimension supports a hnsw index (used for approximate nearest neighbor search).
+ if ((type.dimensions().size() == 1) &&
+ type.dimensions().get(0).isIndexed()) {
+ return true;
+ }
+ return false;
+ }
+
+ private String tensorTypeToString(ImmutableSDField field) {
+ return ((TensorDataType)field.getDataType()).getTensorType().toString();
+ }
+
private void validateAttributeSettingForTensorField(SDField field) {
if (field.doesAttributing()) {
- Attribute attribute = field.getAttributes().get(field.getName());
+ var attribute = field.getAttributes().get(field.getName());
if (attribute != null && attribute.isFastSearch()) {
fail(search, field, "An attribute of type 'tensor' cannot be 'fast-search'.");
}
}
}
+ private void processIndexSettingsForTensorField(SDField field) {
+ if (!field.doesIndexing()) {
+ return;
+ }
+ if (isTensorTypeThatSupportsHnswIndex(field)) {
+ if (!field.doesAttributing()) {
+ fail(search, field, "A tensor that has an index must also be an attribute.");
+ }
+ var index = field.getIndex(field.getName());
+ // TODO: Calculate default params based on tensor dimension size
+ var params = new HnswIndexParams();
+ if (index != null && index.getHnswIndexParams().isPresent()) {
+ params = params.overrideFrom(index.getHnswIndexParams().get());
+ }
+ field.getAttribute().setHnswIndexParams(params);
+ }
+ }
+
private void validateDataTypeForCollectionField(SDField field) {
if (((CollectionDataType)field.getDataType()).getNestedType() instanceof TensorDataType)
fail(search, field, "A field with collection type of tensor is not supported. Use simple type 'tensor' instead.");