diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-23 14:01:38 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-23 14:01:38 +0100 |
commit | 2b319f08c3b399ae29e9ca1e1b47fb1c4cc28725 (patch) | |
tree | 152d3afd2ceb32892a68f398fe4e7ea1993a9832 | |
parent | 402bf7a45096fd8d6193cf92728c5620cc3910bb (diff) |
Add dictionary control for numeric fields.
13 files changed, 305 insertions, 12 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java index 3e421f9ba05..4a415fccbcc 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java @@ -6,6 +6,7 @@ import com.yahoo.document.DataType; import com.yahoo.document.PositionDataType; import com.yahoo.searchdefinition.Search; import com.yahoo.searchdefinition.document.Attribute; +import com.yahoo.searchdefinition.document.Dictionary; import com.yahoo.searchdefinition.document.ImmutableSDField; import com.yahoo.searchdefinition.document.Ranking; import com.yahoo.searchdefinition.document.Sorting; @@ -251,9 +252,25 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce ib.hnsw.multithreadedindexing(params.multiThreadedIndexing()); aaB.index(ib); } + Dictionary dictionary = attribute.getDictionary(); + if (dictionary != null) { + aaB.dictionary.type(convert(dictionary.getType())); + } return aaB; } + private static AttributesConfig.Attribute.Dictionary.Type.Enum convert(Dictionary.Type type) { + switch (type) { + case BTREE: + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + case HASH: + return AttributesConfig.Attribute.Dictionary.Type.HASH; + case BTREE_AND_HASH: + return AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH; + } + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + } + public void getConfig(AttributesConfig.Builder builder, FieldSet fs) { for (Attribute attribute : attributes.values()) { if (isAttributeInFieldSet(attribute, fs)) { diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java index 8cf862a72af..f230a7c10eb 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java @@ -78,6 +78,8 @@ public final class Attribute implements Cloneable, Serializable { /** The aliases for this attribute */ private final Set<String> aliases = new LinkedHashSet<>(); + private Dictionary dictionary = new Dictionary(); + /** * True if this attribute should be returned during first pass of search. * Null means make the default decision for this kind of attribute @@ -208,6 +210,7 @@ public final class Attribute implements Cloneable, Serializable { public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; } public Sorting getSorting() { return sorting; } + public Dictionary getDictionary() { return dictionary; } public void setRemoveIfZero(boolean remove) { this.removeIfZero = remove; } public void setCreateIfNonExistent(boolean create) { this.createIfNonExistent = create; } @@ -231,6 +234,7 @@ public final class Attribute implements Cloneable, Serializable { public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); } public void setDistanceMetric(DistanceMetric metric) { this.distanceMetric = Optional.of(metric); } public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); } + public void setDictionary(Dictionary dictionary) { this.dictionary = dictionary; } public String getName() { return name; } public Type getType() { return type; } @@ -348,7 +352,7 @@ public final class Attribute implements Cloneable, Serializable { @Override public int hashCode() { return Objects.hash( - name, type, collectionType, sorting, isPrefetch(), fastAccess, removeIfZero, createIfNonExistent, + name, type, collectionType, sorting, dictionary, isPrefetch(), fastAccess, removeIfZero, createIfNonExistent, isPosition, huge, enableBitVectors, enableOnlyBitVector, tensorType, referenceDocumentType, distanceMetric, hnswIndexParams); } @@ -370,10 +374,10 @@ public final class Attribute implements Cloneable, Serializable { if (this.createIfNonExistent != other.createIfNonExistent) return false; if (this.enableBitVectors != other.enableBitVectors) return false; if (this.enableOnlyBitVector != other.enableOnlyBitVector) return false; - // if (this.noSearch != other.noSearch) return false; No backend consequences so compatible for now if (this.fastSearch != other.fastSearch) return false; if (this.huge != other.huge) return false; if (! this.sorting.equals(other.sorting)) return false; + if (! Objects.equals(dictionary, other.dictionary)) return false; if (! Objects.equals(tensorType, other.tensorType)) return false; if (! Objects.equals(referenceDocumentType, other.referenceDocumentType)) return false; if (! Objects.equals(distanceMetric, other.distanceMetric)) return false; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java new file mode 100644 index 00000000000..e492d572f27 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java @@ -0,0 +1,16 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.searchdefinition.document; + +/** + * Represents settings for dictionary control + * + * @author baldersheim + */ +public class Dictionary { + public enum Type { BTREE, HASH, BTREE_AND_HASH }; + private final Type type; + public Dictionary() { this(Type.BTREE); } + public Dictionary(Type type) { this.type = type; } + public Type getType() { return type; } +} diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java index d81394382c2..1bf7ab2fb24 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java @@ -80,6 +80,8 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, */ private Matching matching = new Matching(); + private Dictionary dictionary = null; + /** Attribute settings, or null if there are none */ private final Map<String, Attribute> attributes = new TreeMap<>(); @@ -533,6 +535,14 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, public void setMatching(Matching matching) { this.matching=matching; } /** + * Returns what kind of matching type should be applied. + */ + public Dictionary getDictionary() { return dictionary; } + + + public void setDictionary(Dictionary dictionary) { this.dictionary=dictionary; } + + /** * Set the matching type for this field and all subfields. */ // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/AttributeOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/AttributeOperation.java index b638932a4a8..56e241adb8e 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/AttributeOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/AttributeOperation.java @@ -90,10 +90,6 @@ public class AttributeOperation implements FieldOperation, FieldOperationContain this.enableOnlyBitVector = enableOnlyBitVector; } - public boolean isDoAlias() { - return doAlias; - } - public void setDoAlias(boolean doAlias) { this.doAlias = doAlias; } @@ -106,9 +102,6 @@ public class AttributeOperation implements FieldOperation, FieldOperationContain this.alias = alias; } - public String getAliasedName() { - return aliasedName; - } public void setAliasedName(String aliasedName) { this.aliasedName = aliasedName; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java new file mode 100644 index 00000000000..ce7c5a71a21 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java @@ -0,0 +1,35 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.searchdefinition.fieldoperation; + +import com.yahoo.searchdefinition.document.Dictionary; +import com.yahoo.searchdefinition.document.SDField; + +/** + * Represents operations controlling setup of dictionary used for queries + * + * @author baldersheim + */ +public class DictionaryOperation implements FieldOperation { + private final Dictionary.Type type; + + public DictionaryOperation(Dictionary.Type type) { + this.type = type; + } + @Override + public void apply(SDField field) { + Dictionary prev = field.getDictionary(); + if (prev == null) { + field.setDictionary(new Dictionary(type)); + } else if ((prev.getType() == Dictionary.Type.BTREE && type == Dictionary.Type.HASH) || + (prev.getType() == Dictionary.Type.HASH && type == Dictionary.Type.BTREE)) + { + field.setDictionary(new Dictionary(Dictionary.Type.BTREE_AND_HASH)); + } else { + if (prev.getType() != type) { + throw new IllegalArgumentException("Can not combine previous dictionary setting " + prev.getType() + + " with current " + type); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java new file mode 100644 index 00000000000..dbc9a5e8e71 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java @@ -0,0 +1,41 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchdefinition.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.NumericDataType; +import com.yahoo.searchdefinition.RankProfileRegistry; +import com.yahoo.searchdefinition.Search; +import com.yahoo.searchdefinition.document.Attribute; +import com.yahoo.searchdefinition.document.Dictionary; +import com.yahoo.searchdefinition.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Propagates dictionary settings from field lecel to attribute level. + * Only applies to numeric fields with fast-search enabled. + * + * @author baldersheim + */ +public class DictionaryProcessor extends Processor { + public DictionaryProcessor(Search search, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(search, deployLogger, rankProfileRegistry, queryProfiles); + } + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : search.allConcreteFields()) { + Dictionary dictionary = field.getDictionary(); + if (dictionary == null) continue; + + Attribute attribute = field.getAttribute(); + if (attribute.getDataType() instanceof NumericDataType ) { + if (attribute.isFastSearch()) { + attribute.setDictionary(dictionary); + } else { + fail(search, field, "You must specify attribute:fast-search to allow dictionary control"); + } + } else { + fail(search, field, "You can only specify 'dictionary:' for numeric fields"); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/PredicateProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/PredicateProcessor.java index 79f19efe422..ff0edcd0404 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/PredicateProcessor.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/PredicateProcessor.java @@ -15,7 +15,13 @@ import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; import com.yahoo.vespa.indexinglanguage.ExpressionConverter; -import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetValueExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; import com.yahoo.vespa.model.container.search.QueryProfiles; import java.util.ArrayList; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processing.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processing.java index 1a3ef9e54b4..136d352ece7 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processing.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processing.java @@ -37,6 +37,7 @@ public class Processing { AttributesImplicitWord::new, MutableAttributes::new, CreatePositionZCurve::new, + DictionaryProcessor::new, WordMatch::new, ImportedFieldsResolver::new, ImplicitSummaries::new, diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processor.java index 3744af7cc2c..61b5e6f2a64 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processor.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/Processor.java @@ -25,7 +25,7 @@ import java.util.logging.Level; public abstract class Processor { protected final Search search; - protected DeployLogger deployLogger; + protected final DeployLogger deployLogger; protected final RankProfileRegistry rankProfileRegistry; protected final QueryProfiles queryProfiles; diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index 869f671d8ef..2607b2fb02b 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -252,6 +252,7 @@ TOKEN : | < PROPERTIES: "properties" > | < ATTRIBUTE: "attribute" > | < SORTING: "sorting" > +| < DICTIONARY: "dictionary" > | < ASCENDING: "ascending" > | < DESCENDING: "descending" > | < UCA: "uca" > @@ -267,6 +268,8 @@ TOKEN : | < IDENTICAL: "identical" > | < STEMMING: "stemming" > | < NORMALIZING: "normalizing" > +| < HASH: "hash" > +| < BTREE: "btree" > | < BOLDING: "bolding" > | < BODY: "body" > | < HEADER: "header" > @@ -987,6 +990,7 @@ String fieldBody(SDField field, Search search, SDDocumentType document) : { } attribute(field) | body(field) | bolding(field) | + dictionary(field) | fieldStemming(field) | header(field) | id(field, document) | @@ -1520,6 +1524,34 @@ void bolding(FieldOperationContainer field) : } /** + * This rule consumes a dictionary statement of a field element. + * + * @param field The field to modify. + */ +void dictionary(FieldOperationContainer field) : +{ + Dictionary.Type type; +} +{ + <DICTIONARY> <COLON> type = dictionaryType() + { + field.addOperation(new DictionaryOperation(type)); + } +} + +Dictionary.Type dictionaryType() : +{ + Dictionary.Type type; +} +{ + ( <HASH> { type = Dictionary.Type.HASH; } + | <BTREE> { type = Dictionary.Type.BTREE; } ) + { + return type; + } +} + +/** * This rule consumes a body statement of a field element. * * @param field The field to modify. diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/BoldingTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/BoldingTestCase.java index 911f8e797e1..5589ad018a7 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/BoldingTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/BoldingTestCase.java @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.searchdefinition.processing; import com.yahoo.searchdefinition.SearchBuilder; diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java new file mode 100644 index 00000000000..9b3c02cecc3 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java @@ -0,0 +1,138 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.searchdefinition.processing; + +import com.yahoo.searchdefinition.Search; +import com.yahoo.searchdefinition.SearchBuilder; +import com.yahoo.searchdefinition.derived.AttributeFields; +import com.yahoo.searchdefinition.document.Dictionary; +import com.yahoo.searchdefinition.parser.ParseException; +import com.yahoo.vespa.config.search.AttributesConfig; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +/** + * Test configuration of dictionary control. + * + * @author baldersheim + */ +public class DictionaryTestCase { + private static AttributesConfig getConfig(Search search) { + AttributeFields attributes = new AttributeFields(search); + AttributesConfig.Builder builder = new AttributesConfig.Builder(); + attributes.getConfig(builder); + return builder.build(); + } + @Test + public void testDefaultDictionarySettings() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field s1 type string {\n" + + " indexing: attribute | summary\n" + + " }\n" + + "\n" + + " field n1 type int {\n" + + " indexing: summary | attribute\n" + + " }\n" + + " }\n" + + "}\n"; + SearchBuilder sb = SearchBuilder.createFromString(def); + Search search = sb.getSearch(); + assertEquals(Dictionary.Type.BTREE, search.getAttribute("s1").getDictionary().getType()); + assertEquals(Dictionary.Type.BTREE, search.getAttribute("n1").getDictionary().getType()); + } + @Test + public void testNumericBtreeSettings() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field n1 type int {\n" + + " indexing: summary | attribute\n" + + " attribute:fast-search\n" + + " dictionary:btree\n" + + " }\n" + + " }\n" + + "}\n"; + SearchBuilder sb = SearchBuilder.createFromString(def); + Search search = sb.getSearch(); + assertEquals(Dictionary.Type.BTREE, search.getAttribute("n1").getDictionary().getType()); + assertEquals(AttributesConfig.Attribute.Dictionary.Type.BTREE, + getConfig(search).attribute().get(0).dictionary().type()); + } + @Test + public void testNumericHashSettings() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field n1 type int {\n" + + " indexing: summary | attribute\n" + + " attribute:fast-search\n" + + " dictionary:hash\n" + + " }\n" + + " }\n" + + "}\n"; + SearchBuilder sb = SearchBuilder.createFromString(def); + Search search = sb.getSearch(); + assertEquals(Dictionary.Type.HASH, search.getAttribute("n1").getDictionary().getType()); + assertEquals(AttributesConfig.Attribute.Dictionary.Type.HASH, + getConfig(search).attribute().get(0).dictionary().type()); + } + @Test + public void testNumericBtreeAndHashSettings() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field n1 type int {\n" + + " indexing: summary | attribute\n" + + " attribute:fast-search\n" + + " dictionary:hash\n" + + " dictionary:btree\n" + + " }\n" + + " }\n" + + "}\n"; + SearchBuilder sb = SearchBuilder.createFromString(def); + Search search = sb.getSearch(); + assertEquals(Dictionary.Type.BTREE_AND_HASH, search.getAttribute("n1").getDictionary().getType()); + assertEquals(AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, + getConfig(search).attribute().get(0).dictionary().type()); + } + @Test + public void testNonNumericFieldsFailsDictionaryControl() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field n1 type string {\n" + + " indexing: summary | attribute\n" + + " dictionary:btree\n" + + " }\n" + + " }\n" + + "}\n"; + try { + SearchBuilder sb = SearchBuilder.createFromString(def); + fail("Controlling dictionary for non-numeric fields are not yet supported."); + } catch (IllegalArgumentException e) { + assertEquals("For search 'test', field 'n1': You can only specify 'dictionary:' for numeric fields", e.getMessage()); + } + } + @Test + public void testNonFastSearchFieldsFailsDictionaryControl() throws ParseException { + String def = + "search test {\n" + + " document test {\n" + + " field n1 type int {\n" + + " indexing: summary | attribute\n" + + " dictionary:btree\n" + + " }\n" + + " }\n" + + "}\n"; + try { + SearchBuilder sb = SearchBuilder.createFromString(def); + fail("Controlling dictionary for non-fast-search fields are not allowed."); + } catch (IllegalArgumentException e) { + assertEquals("For search 'test', field 'n1': You must specify attribute:fast-search to allow dictionary control", e.getMessage()); + } + } +} |