aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/document/Attribute.java
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/document/Attribute.java')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Attribute.java435
1 files changed, 435 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java
new file mode 100644
index 00000000000..f2279a52855
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java
@@ -0,0 +1,435 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.document;
+
+import com.yahoo.document.ArrayDataType;
+import com.yahoo.document.CollectionDataType;
+import com.yahoo.document.DataType;
+import com.yahoo.document.DocumentType;
+import com.yahoo.document.PrimitiveDataType;
+import com.yahoo.documentmodel.NewDocumentReferenceDataType;
+import com.yahoo.document.StructuredDataType;
+import com.yahoo.document.TensorDataType;
+import com.yahoo.document.WeightedSetDataType;
+import com.yahoo.document.datatypes.BoolFieldValue;
+import com.yahoo.document.datatypes.ByteFieldValue;
+import com.yahoo.document.datatypes.DoubleFieldValue;
+import com.yahoo.document.datatypes.FieldValue;
+import com.yahoo.document.datatypes.FloatFieldValue;
+import com.yahoo.document.datatypes.IntegerFieldValue;
+import com.yahoo.document.datatypes.LongFieldValue;
+import com.yahoo.document.datatypes.PredicateFieldValue;
+import com.yahoo.document.datatypes.Raw;
+import com.yahoo.document.datatypes.Float16FieldValue;
+import com.yahoo.document.datatypes.StringFieldValue;
+import com.yahoo.document.datatypes.TensorFieldValue;
+import com.yahoo.tensor.TensorType;
+
+import java.io.Serializable;
+import java.util.function.Supplier;
+import java.util.LinkedHashSet;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * A search-time document attribute (per-document in-memory value).
+ * This belongs to the field defining the attribute.
+ *
+ * @author bratseth
+ */
+public final class Attribute implements Cloneable, Serializable {
+
+ public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING }
+
+ // Remember to change hashCode and equals when you add new fields
+
+ private String name;
+
+ private Type type;
+ private CollectionType collectionType;
+
+ private boolean removeIfZero = false;
+ private boolean createIfNonExistent = false;
+ private boolean enableBitVectors = false;
+ private boolean enableOnlyBitVector = false;
+
+ private boolean fastRank = false;
+ private boolean fastSearch = false;
+ private boolean fastAccess = false;
+ private boolean huge = false;
+ private boolean mutable = false;
+ private boolean paged = false;
+ private int arity = BooleanIndexDefinition.DEFAULT_ARITY;
+ private long lowerBound = BooleanIndexDefinition.DEFAULT_LOWER_BOUND;
+ private long upperBound = BooleanIndexDefinition.DEFAULT_UPPER_BOUND;
+ private double densePostingListThreshold = BooleanIndexDefinition.DEFAULT_DENSE_POSTING_LIST_THRESHOLD;
+
+ /** This is set if the type of this is TENSOR */
+ private Optional<TensorType> tensorType = Optional.empty();
+
+ /** This is set if the type of this is REFERENCE */
+ private final Optional<StructuredDataType> referenceDocumentType;
+
+ private Optional<DistanceMetric> distanceMetric = Optional.empty();
+
+ private Optional<HnswIndexParams> hnswIndexParams = Optional.empty();
+
+ private boolean isPosition = false;
+ private final Sorting sorting = new Sorting();
+
+ /** The aliases for this attribute */
+ private final Set<String> aliases = new LinkedHashSet<>();
+
+ private Dictionary dictionary = null;
+ private Case casing = Case.UNCASED;
+
+ /**
+ * True if this attribute should be returned during first pass of search.
+ * Null means make the default decision for this kind of attribute
+ */
+ private Boolean prefetch = null;
+
+ /** The attribute type enumeration */
+ public enum Type {
+ BYTE("byte", "INT8"),
+ SHORT("short", "INT16"),
+ INTEGER("integer", "INT32"),
+ LONG("long", "INT64"),
+ FLOAT16("float16", "FLOAT16"),
+ FLOAT("float", "FLOAT"),
+ DOUBLE("double", "DOUBLE"),
+ STRING("string", "STRING"),
+ BOOL("bool", "BOOL"),
+ PREDICATE("predicate", "PREDICATE"),
+ TENSOR("tensor", "TENSOR"),
+ REFERENCE("reference", "REFERENCE");
+
+ private final String myName; // different from what name() returns.
+ private final String exportAttributeTypeName;
+
+ Type(String name, String exportAttributeTypeName) {
+ this.myName=name;
+ this.exportAttributeTypeName = exportAttributeTypeName;
+ }
+
+ public String getName() { return myName; }
+ public String getExportAttributeTypeName() { return exportAttributeTypeName; }
+
+ @Override
+ public String toString() {
+ return "type: " + myName;
+ }
+ }
+
+ /** The attribute collection type enumeration */
+ public enum CollectionType {
+
+ SINGLE("SINGLE"),
+ ARRAY("ARRAY"),
+ WEIGHTEDSET ("WEIGHTEDSET");
+
+ private final String name;
+
+ CollectionType(String name) {
+ this.name=name;
+ }
+
+ public String getName() { return name; }
+
+ @Override
+ public String toString() {
+ return "collectiontype: " + name;
+ }
+
+ }
+
+ /** Creates an attribute with default settings */
+ public Attribute(String name, DataType fieldType) {
+ this(name, convertDataType(fieldType), convertCollectionType(fieldType), convertTensorType(fieldType), convertTargetType(fieldType));
+ setRemoveIfZero(fieldType instanceof WeightedSetDataType ? ((WeightedSetDataType)fieldType).removeIfZero() : false);
+ setCreateIfNonExistent(fieldType instanceof WeightedSetDataType ? ((WeightedSetDataType)fieldType).createIfNonExistent() : false);
+ }
+
+ public Attribute(String name, Type type, CollectionType collectionType) {
+ this(name, type, collectionType, Optional.empty(), Optional.empty());
+ }
+
+ public Attribute(String name,
+ Type type,
+ CollectionType collectionType,
+ Optional<TensorType> tensorType,
+ Optional<StructuredDataType> referenceDocumentType) {
+ this.name=name;
+ setType(type);
+ setCollectionType(collectionType);
+ this.tensorType = tensorType;
+ this.referenceDocumentType = referenceDocumentType;
+ }
+
+ public Attribute convertToArray() {
+ Attribute result = clone();
+ result.collectionType = CollectionType.ARRAY;
+ return result;
+ }
+
+ /**
+ * <p>Returns whether this attribute should be included in the "attributeprefetch" summary
+ * which is returned to the Qrs by prefetchAttributes, used by blending, uniquing etc.
+ *
+ * <p>Single value attributes are prefetched by default if summary is true.
+ * Multi value attributes are not.</p>
+ */
+ public boolean isPrefetch() {
+ if (prefetch!=null) return prefetch.booleanValue();
+
+ if (CollectionType.SINGLE.equals(collectionType)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /** Returns the prefetch value of this, null if the default is used. */
+ public Boolean getPrefetchValue() { return prefetch; }
+
+ public boolean isRemoveIfZero() { return removeIfZero; }
+ public boolean isCreateIfNonExistent() { return createIfNonExistent; }
+ public boolean isEnabledBitVectors() { return enableBitVectors; }
+ public boolean isEnabledOnlyBitVector() { return enableOnlyBitVector; }
+ public boolean isFastSearch() { return fastSearch; }
+ public boolean isFastRank() { return fastRank; }
+ public boolean isFastAccess() { return fastAccess; }
+ public boolean isHuge() { return huge; }
+ public boolean isPaged() { return paged; }
+ public boolean isPosition() { return isPosition; }
+ public boolean isMutable() { return mutable; }
+
+ public int arity() { return arity; }
+ public long lowerBound() { return lowerBound; }
+ public long upperBound() { return upperBound; }
+ public double densePostingListThreshold() { return densePostingListThreshold; }
+ public Optional<TensorType> tensorType() { return tensorType; }
+ public Optional<StructuredDataType> referenceDocumentType() { return referenceDocumentType; }
+
+ public static final DistanceMetric DEFAULT_DISTANCE_METRIC = DistanceMetric.EUCLIDEAN;
+ public DistanceMetric distanceMetric() {
+ return distanceMetric.orElse(DEFAULT_DISTANCE_METRIC);
+ }
+ public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; }
+
+ public Sorting getSorting() { return sorting; }
+ public Dictionary getDictionary() { return dictionary; }
+ public Case getCase() { return casing; }
+
+ public void setRemoveIfZero(boolean remove) { this.removeIfZero = remove; }
+ public void setCreateIfNonExistent(boolean create) { this.createIfNonExistent = create; }
+
+ /**
+ * Sets whether this should be included in the "attributeprefetch" document summary.
+ * True or false to override default, null to use default
+ */
+ public void setPrefetch(Boolean prefetch) { this.prefetch = prefetch; }
+ public void setEnableBitVectors(boolean enableBitVectors) { this.enableBitVectors = enableBitVectors; }
+ public void setEnableOnlyBitVector(boolean enableOnlyBitVector) { this.enableOnlyBitVector = enableOnlyBitVector; }
+ public void setFastRank(boolean value) {
+ Supplier<IllegalArgumentException> badGen = () -> new IllegalArgumentException("fast-rank is only valid for tensor attributes, invalid for: "+this);
+ var tt = tensorType.orElseThrow(badGen);
+ for (var dim : tt.dimensions()) {
+ if (dim.isMapped()) {
+ this.fastRank = value;
+ return;
+ }
+ }
+ throw badGen.get();
+ }
+ public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; }
+ public void setHuge(boolean huge) { this.huge = huge; }
+ public void setPaged(boolean paged) { this.paged = paged; }
+ public void setFastAccess(boolean fastAccess) { this.fastAccess = fastAccess; }
+ public void setPosition(boolean position) { this.isPosition = position; }
+ public void setMutable(boolean mutable) { this.mutable = mutable; }
+ public void setArity(int arity) { this.arity = arity; }
+ public void setLowerBound(long lowerBound) { this.lowerBound = lowerBound; }
+ public void setUpperBound(long upperBound) { this.upperBound = upperBound; }
+ public void setDensePostingListThreshold(double threshold) { this.densePostingListThreshold = threshold; }
+ public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); }
+ public void setDistanceMetric(DistanceMetric metric) { this.distanceMetric = Optional.of(metric); }
+ public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); }
+ public void setDictionary(Dictionary dictionary) { this.dictionary = dictionary; }
+ public void setCase(Case casing) { this.casing = casing; }
+
+ public String getName() { return name; }
+ public Type getType() { return type; }
+ public CollectionType getCollectionType() { return collectionType; }
+
+ public void setName(String name) { this.name=name; }
+ private void setType(Type type) { this.type=type; }
+ public void setCollectionType(CollectionType type) { this.collectionType=type; }
+
+ /** Converts to the right attribute type from a field datatype */
+ public static Type convertDataType(DataType fieldType) {
+ if (fieldType instanceof NewDocumentReferenceDataType) {
+ return Type.REFERENCE;
+ } else if (fieldType instanceof CollectionDataType) {
+ return convertDataType(((CollectionDataType) fieldType).getNestedType());
+ }
+ FieldValue fval = fieldType.createFieldValue();
+ if (fval instanceof StringFieldValue) {
+ return Type.STRING;
+ } else if (fval instanceof IntegerFieldValue) {
+ return Type.INTEGER;
+ } else if (fval instanceof LongFieldValue) {
+ return Type.LONG;
+ } else if (fval instanceof FloatFieldValue) {
+ return Type.FLOAT;
+ } else if (fval instanceof DoubleFieldValue) {
+ return Type.DOUBLE;
+ } else if (fval instanceof BoolFieldValue) {
+ return Type.BOOL;
+ } else if (fval instanceof Float16FieldValue) {
+ return Type.FLOAT16;
+ } else if (fval instanceof ByteFieldValue) {
+ return Type.BYTE;
+ } else if (fval instanceof Raw) {
+ return Type.BYTE;
+ } else if (fval instanceof PredicateFieldValue) {
+ return Type.PREDICATE;
+ } else if (fval instanceof TensorFieldValue) {
+ return Type.TENSOR;
+ } else {
+ throw new IllegalArgumentException("Don't know which attribute type to convert "
+ + fieldType + " [" + fieldType.getClass() + "] to");
+ }
+ }
+
+ /** Converts to the right attribute type from a field datatype */
+ private static CollectionType convertCollectionType(DataType fieldType) {
+ if (fieldType instanceof ArrayDataType) {
+ return CollectionType.ARRAY;
+ } else if (fieldType instanceof WeightedSetDataType) {
+ return CollectionType.WEIGHTEDSET;
+ } else if (fieldType instanceof TensorDataType) {
+ return CollectionType.SINGLE;
+ } else if (fieldType instanceof PrimitiveDataType) {
+ return CollectionType.SINGLE;
+ } else if (fieldType instanceof NewDocumentReferenceDataType) {
+ return CollectionType.SINGLE;
+ } else {
+ throw new IllegalArgumentException("Field " + fieldType + " not supported in convertCollectionType");
+ }
+ }
+
+ private static Optional<TensorType> convertTensorType(DataType fieldType) {
+ if ( ! ( fieldType instanceof TensorDataType)) return Optional.empty();
+ return Optional.of(((TensorDataType)fieldType).getTensorType());
+ }
+
+ private static Optional<StructuredDataType> convertTargetType(DataType fieldType) {
+ return Optional.of(fieldType)
+ .filter(NewDocumentReferenceDataType.class::isInstance)
+ .map(NewDocumentReferenceDataType.class::cast)
+ .map(NewDocumentReferenceDataType::getTargetType);
+ }
+
+ /** Converts to the right field type from an attribute type */
+ private DataType toDataType(Type attributeType) {
+ switch (attributeType) {
+ case STRING : return DataType.STRING;
+ case INTEGER: return DataType.INT;
+ case LONG: return DataType.LONG;
+ case FLOAT16: return DataType.FLOAT16;
+ case FLOAT: return DataType.FLOAT;
+ case DOUBLE: return DataType.DOUBLE;
+ case BOOL: return DataType.BOOL;
+ case BYTE: return DataType.BYTE;
+ case PREDICATE: return DataType.PREDICATE;
+ case TENSOR: return DataType.getTensor(tensorType.orElseThrow(IllegalStateException::new));
+ case REFERENCE: return createReferenceDataType();
+ default: throw new IllegalArgumentException("Unknown attribute type " + attributeType);
+ }
+ }
+
+ @SuppressWarnings("deprecation")
+ private DataType createReferenceDataType() {
+ if (referenceDocumentType.isEmpty()) {
+ throw new IllegalStateException("Referenced document type is not set");
+ }
+ StructuredDataType type = referenceDocumentType.get();
+ if (type instanceof DocumentType) {
+ return new NewDocumentReferenceDataType((DocumentType) type);
+ } else {
+ return NewDocumentReferenceDataType.forDocumentName(type.getName());
+ }
+ }
+
+ public DataType getDataType() {
+ DataType dataType = toDataType(type);
+ if (collectionType == Attribute.CollectionType.ARRAY) {
+ return DataType.getArray(dataType);
+ } else if (collectionType == Attribute.CollectionType.WEIGHTEDSET) {
+ return DataType.getWeightedSet(dataType, createIfNonExistent, removeIfZero);
+ } else {
+ return dataType;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(
+ name, type, collectionType, sorting, dictionary, isPrefetch(), fastAccess, removeIfZero,
+ createIfNonExistent, isPosition, huge, mutable, paged, enableBitVectors, enableOnlyBitVector,
+ tensorType, referenceDocumentType, distanceMetric, hnswIndexParams);
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (! (object instanceof Attribute)) return false;
+
+ Attribute other = (Attribute)object;
+ if (!this.name.equals(other.name)) return false;
+ return isCompatible(other);
+ }
+
+ /** Returns whether these attributes describes the same entity, even if they have different names */
+ public boolean isCompatible(Attribute other) {
+ if (! this.type.equals(other.type)) return false;
+ if (! this.collectionType.equals(other.collectionType)) return false;
+ if (this.isPrefetch() != other.isPrefetch()) return false;
+ if (this.removeIfZero != other.removeIfZero) return false;
+ if (this.createIfNonExistent != other.createIfNonExistent) return false;
+ if (this.enableBitVectors != other.enableBitVectors) return false;
+ if (this.enableOnlyBitVector != other.enableOnlyBitVector) return false;
+ if (this.fastSearch != other.fastSearch) return false;
+ if (this.huge != other.huge) return false;
+ if (this.mutable != other.mutable) return false;
+ if (this.paged != other.paged) return false;
+ if (! this.sorting.equals(other.sorting)) return false;
+ if (! Objects.equals(dictionary, other.dictionary)) return false;
+ if (! Objects.equals(tensorType, other.tensorType)) return false;
+ if (! Objects.equals(referenceDocumentType, other.referenceDocumentType)) return false;
+ if (! Objects.equals(distanceMetric, other.distanceMetric)) return false;
+ if (! Objects.equals(hnswIndexParams, other.hnswIndexParams)) return false;
+
+ return true;
+ }
+
+ @Override
+ public Attribute clone() {
+ try {
+ return (Attribute)super.clone();
+ }
+ catch (CloneNotSupportedException e) {
+ throw new RuntimeException("Programming error");
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "attribute '" + name + "' (" + type + ")";
+ }
+
+ public Set<String> getAliases() {
+ return aliases;
+ }
+
+}