diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-12 09:25:41 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-12 09:25:46 +0200 |
commit | 787fa05cebd66820136372164b7e1b2805f9f061 (patch) | |
tree | 264c403f21b98bc5d4d1d65f0b49f378b4184b3b /config-model | |
parent | 015ade7cd232f217dd964da037ab202731b37cef (diff) |
Control cased/uncased in dictionary setting
Diffstat (limited to 'config-model')
8 files changed, 212 insertions, 65 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java index 4a415fccbcc..4277f503440 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/AttributeFields.java @@ -255,6 +255,7 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce Dictionary dictionary = attribute.getDictionary(); if (dictionary != null) { aaB.dictionary.type(convert(dictionary.getType())); + aaB.dictionary.match(convert(dictionary.getMatch())); } return aaB; } @@ -270,6 +271,15 @@ public class AttributeFields extends Derived implements AttributesConfig.Produce } return AttributesConfig.Attribute.Dictionary.Type.BTREE; } + private static AttributesConfig.Attribute.Dictionary.Match.Enum convert(Dictionary.Match type) { + switch (type) { + case CASED: + return AttributesConfig.Attribute.Dictionary.Match.CASED; + case UNCASED: + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } public void getConfig(AttributesConfig.Builder builder, FieldSet fs) { for (Attribute attribute : attributes.values()) { diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java index f230a7c10eb..8841eee79cc 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Attribute.java @@ -78,7 +78,7 @@ public final class Attribute implements Cloneable, Serializable { /** The aliases for this attribute */ private final Set<String> aliases = new LinkedHashSet<>(); - private Dictionary dictionary = new Dictionary(); + private Dictionary dictionary = null; /** * True if this attribute should be returned during first pass of search. diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java index e492d572f27..8f22b344e44 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/Dictionary.java @@ -9,8 +9,28 @@ package com.yahoo.searchdefinition.document; */ public class Dictionary { public enum Type { BTREE, HASH, BTREE_AND_HASH }; - private final Type type; - public Dictionary() { this(Type.BTREE); } - public Dictionary(Type type) { this.type = type; } - public Type getType() { return type; } + public enum Match { CASED, UNCASED }; + private Type type = null; + private Match match = null; + + public void updateType(Type type) { + if (this.type == null) { + this.type = type; + } else if ((this.type == Type.BTREE) && (type == Type.HASH)) { + this.type = Type.BTREE_AND_HASH; + } else if ((this.type == Type.HASH) && (type == Type.BTREE)) { + this.type = Type.BTREE_AND_HASH; + } else { + throw new IllegalArgumentException("Can not combine previous dictionary setting " + this.type + + " with current " + type); + } + } + public void updateMatch(Match match) { + if (this.match != null) { + throw new IllegalArgumentException("dictionary match mode has already been set to " + this.match); + } + this.match = match; + } + public Type getType() { return (type != null) ? type : Type.BTREE; } + public Match getMatch() { return (match != null) ? match : Match.UNCASED; } } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java index 76b707fa19b..ebfdf6b1f6e 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java @@ -538,9 +538,12 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, * Returns Dictionary settings. */ public Dictionary getDictionary() { return dictionary; } - - - public void setDictionary(Dictionary dictionary) { this.dictionary=dictionary; } + public Dictionary getOrSetDictionary() { + if (dictionary == null) { + dictionary = new Dictionary(); + } + return dictionary; + } /** * Set the matching type for this field and all subfields. diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java index ce7c5a71a21..70b0706df29 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/DictionaryOperation.java @@ -11,25 +11,30 @@ import com.yahoo.searchdefinition.document.SDField; * @author baldersheim */ public class DictionaryOperation implements FieldOperation { - private final Dictionary.Type type; + public enum Operation { HASH, BTREE, CASED, UNCASED } + private final Operation operation; - public DictionaryOperation(Dictionary.Type type) { - this.type = type; + public DictionaryOperation(Operation type) { + this.operation = type; } @Override public void apply(SDField field) { - Dictionary prev = field.getDictionary(); - if (prev == null) { - field.setDictionary(new Dictionary(type)); - } else if ((prev.getType() == Dictionary.Type.BTREE && type == Dictionary.Type.HASH) || - (prev.getType() == Dictionary.Type.HASH && type == Dictionary.Type.BTREE)) - { - field.setDictionary(new Dictionary(Dictionary.Type.BTREE_AND_HASH)); - } else { - if (prev.getType() != type) { - throw new IllegalArgumentException("Can not combine previous dictionary setting " + prev.getType() + - " with current " + type); - } + Dictionary dictionary = field.getOrSetDictionary(); + switch (operation) { + case HASH: + dictionary.updateType(Dictionary.Type.HASH); + break; + case BTREE: + dictionary.updateType(Dictionary.Type.BTREE); + break; + case CASED: + dictionary.updateMatch(Dictionary.Match.CASED); + break; + case UNCASED: + dictionary.updateMatch(Dictionary.Match.UNCASED); + break; + default: + throw new IllegalArgumentException("Unhandled operation " + operation); } } } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java index fd567ec2d54..66df78466e6 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/DictionaryProcessor.java @@ -3,6 +3,7 @@ package com.yahoo.searchdefinition.processing; import com.yahoo.config.application.api.DeployLogger; import com.yahoo.document.NumericDataType; +import com.yahoo.document.PrimitiveDataType; import com.yahoo.searchdefinition.RankProfileRegistry; import com.yahoo.searchdefinition.Search; import com.yahoo.searchdefinition.document.Attribute; @@ -25,16 +26,18 @@ public class DictionaryProcessor extends Processor { for (SDField field : search.allConcreteFields()) { Dictionary dictionary = field.getDictionary(); if (dictionary == null) continue; - Attribute attribute = field.getAttribute(); + if (attribute == null) continue; if (attribute.getDataType().getPrimitiveType() instanceof NumericDataType ) { if (attribute.isFastSearch()) { attribute.setDictionary(dictionary); } else { fail(search, field, "You must specify 'attribute:fast-search' to allow dictionary control"); } + } else if (attribute.getDataType().getPrimitiveType() == PrimitiveDataType.STRING) { + attribute.setDictionary(dictionary); } else { - fail(search, field, "You can only specify 'dictionary:' for numeric fields"); + fail(search, field, "You can only specify 'dictionary:' for numeric or string fields"); } } } diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index f8a648006e9..0e47050d34d 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -270,6 +270,8 @@ TOKEN : | < NORMALIZING: "normalizing" > | < HASH: "hash" > | < BTREE: "btree" > +| < CASED: "cased" > +| < UNCASED: "uncased" > | < BOLDING: "bolding" > | < BODY: "body" > | < HEADER: "header" > @@ -1530,24 +1532,25 @@ void bolding(FieldOperationContainer field) : */ void dictionary(FieldOperationContainer field) : { - Dictionary.Type type; } { - <DICTIONARY> <COLON> type = dictionaryType() + <DICTIONARY> + ( (<COLON> dictionarySetting(field)) + | (lbrace() (dictionarySetting(field) (<NL>)*)* <RBRACE>)) { - field.addOperation(new DictionaryOperation(type)); } } -Dictionary.Type dictionaryType() : +void dictionarySetting(FieldOperationContainer field) : { Dictionary.Type type; } { - ( <HASH> { type = Dictionary.Type.HASH; } - | <BTREE> { type = Dictionary.Type.BTREE; } ) + ( <HASH> { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.HASH)); } + | <BTREE> { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.BTREE)); } + | <CASED> { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.CASED)); } + | <UNCASED> { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.UNCASED)); }) { - return type; } } @@ -2642,6 +2645,7 @@ String identifier() : { } | <BODY> | <BOLDING> | <BTREE> + | <CASED> | <COMPRESSION> | <COMPRESSIONLEVEL> | <COMPRESSIONTHRESHOLD> @@ -2747,6 +2751,7 @@ String identifier() : { } | <TRUE> | <TYPE> | <UCA> + | <UNCASED> | <URI> | <UPPERBOUND> | <USEDOCUMENT> diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java index ba51caca0f7..7bda2665272 100644 --- a/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java +++ b/config-model/src/test/java/com/yahoo/searchdefinition/processing/DictionaryTestCase.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.config.search.AttributesConfig; import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; /** @@ -44,14 +45,22 @@ public class DictionaryTestCase { " }", "}"); Search search = createSearch(def); - assertEquals(Dictionary.Type.BTREE, search.getAttribute("s1").getDictionary().getType()); - assertEquals(Dictionary.Type.BTREE, search.getAttribute("n1").getDictionary().getType()); + assertNull(search.getAttribute("s1").getDictionary()); + assertNull(search.getAttribute("n1").getDictionary()); + assertEquals(AttributesConfig.Attribute.Dictionary.Type.BTREE, + getConfig(search).attribute().get(0).dictionary().type()); + assertEquals(AttributesConfig.Attribute.Dictionary.Type.BTREE, + getConfig(search).attribute().get(1).dictionary().type()); + assertEquals(AttributesConfig.Attribute.Dictionary.Match.CASE_INSENSITIVE, + getConfig(search).attribute().get(0).dictionary().match()); + assertEquals(AttributesConfig.Attribute.Dictionary.Match.CASE_INSENSITIVE, + getConfig(search).attribute().get(1).dictionary().match()); } - void verifyNumericDictionaryControl(Dictionary.Type expected, - AttributesConfig.Attribute.Dictionary.Type.Enum expectedConfig, - String type, - String ... cfg) throws ParseException + Search verifyDictionaryControl(Dictionary.Type expected, + AttributesConfig.Attribute.Dictionary.Type.Enum expectedConfig, + String type, + String ... cfg) throws ParseException { String def = TestUtil.joinLines( "search test {", @@ -65,74 +74,166 @@ public class DictionaryTestCase { "}"); Search search = createSearch(def); assertEquals(expected, search.getAttribute("n1").getDictionary().getType()); - assertEquals(expectedConfig, - getConfig(search).attribute().get(0).dictionary().type()); + assertEquals(expectedConfig, getConfig(search).attribute().get(0).dictionary().type()); + return search; + } + + void verifyStringDictionaryControl(Dictionary.Type expectedType,Dictionary.Match expectedCase, + AttributesConfig.Attribute.Dictionary.Type.Enum expectedTypeCfg, + AttributesConfig.Attribute.Dictionary.Match.Enum expectedCaseCfg, + String type, + String ... cfg) throws ParseException + { + + Search search = verifyDictionaryControl(expectedType, expectedTypeCfg, type, cfg); + assertEquals(expectedCase, search.getAttribute("n1").getDictionary().getMatch()); + assertEquals(expectedCaseCfg, getConfig(search).attribute().get(0).dictionary().match()); + } + + @Test + public void testCasedBtreeSettings() throws ParseException { + verifyDictionaryControl(Dictionary.Type.BTREE, + AttributesConfig.Attribute.Dictionary.Type.BTREE, + "int", + "dictionary:cased"); } @Test public void testNumericBtreeSettings() throws ParseException { - verifyNumericDictionaryControl(Dictionary.Type.BTREE, + verifyDictionaryControl(Dictionary.Type.BTREE, AttributesConfig.Attribute.Dictionary.Type.BTREE, "int", "dictionary:btree"); } @Test public void testNumericHashSettings() throws ParseException { - verifyNumericDictionaryControl(Dictionary.Type.HASH, + verifyDictionaryControl(Dictionary.Type.HASH, AttributesConfig.Attribute.Dictionary.Type.HASH, "int", "dictionary:hash"); } @Test public void testNumericBtreeAndHashSettings() throws ParseException { - verifyNumericDictionaryControl(Dictionary.Type.BTREE_AND_HASH, + verifyDictionaryControl(Dictionary.Type.BTREE_AND_HASH, AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, "int", "dictionary:btree", "dictionary:hash"); } @Test public void testNumericArrayBtreeAndHashSettings() throws ParseException { - verifyNumericDictionaryControl(Dictionary.Type.BTREE_AND_HASH, + verifyDictionaryControl(Dictionary.Type.BTREE_AND_HASH, AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, "array<int>", "dictionary:btree", "dictionary:hash"); } @Test public void testNumericWSetBtreeAndHashSettings() throws ParseException { - verifyNumericDictionaryControl(Dictionary.Type.BTREE_AND_HASH, + verifyDictionaryControl(Dictionary.Type.BTREE_AND_HASH, AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, "weightedset<int>", "dictionary:btree", "dictionary:hash"); } @Test + public void testStringBtreeSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary:btree"); + } + @Test + public void testStringBtreeUnCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary { btree\nuncased\n}"); + } + @Test + public void testStringBtreeCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE, Dictionary.Match.CASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE, + AttributesConfig.Attribute.Dictionary.Match.CASED, + "string", + "dictionary { btree\ncased\n}"); + } + @Test + public void testStringHashSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.HASH, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.HASH, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary:hash"); + } + @Test + public void testStringHashUnCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.HASH, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.HASH, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary { hash\nuncased\n}"); + } + @Test + public void testStringHashCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.HASH, Dictionary.Match.CASED, + AttributesConfig.Attribute.Dictionary.Type.HASH, + AttributesConfig.Attribute.Dictionary.Match.CASED, + "string", + "dictionary { hash\ncased\n}"); + } + @Test + public void testStringBtreeHashSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE_AND_HASH, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary{hash\nbtree\n}"); + } + @Test + public void testStringBtreeHashUnCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE_AND_HASH, Dictionary.Match.UNCASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, + AttributesConfig.Attribute.Dictionary.Match.UNCASED, + "string", + "dictionary { hash\nbtree\nuncased\n}"); + } + @Test + public void testStringBtreeHashCasedSettings() throws ParseException { + verifyStringDictionaryControl(Dictionary.Type.BTREE_AND_HASH, Dictionary.Match.CASED, + AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH, + AttributesConfig.Attribute.Dictionary.Match.CASED, + "string", + "dictionary { btree\nhash\ncased\n}"); + } + @Test public void testNonNumericFieldsFailsDictionaryControl() throws ParseException { - String def = - "search test {\n" + - " document test {\n" + - " field n1 type string {\n" + - " indexing: summary | attribute\n" + - " dictionary:btree\n" + - " }\n" + - " }\n" + - "}\n"; + String def = TestUtil.joinLines( + "search test {", + " document test {", + " field n1 type bool {", + " indexing: summary | attribute", + " dictionary:btree", + " }", + " }", + "}"); try { SearchBuilder sb = SearchBuilder.createFromString(def); fail("Controlling dictionary for non-numeric fields are not yet supported."); } catch (IllegalArgumentException e) { - assertEquals("For search 'test', field 'n1': You can only specify 'dictionary:' for numeric fields", e.getMessage()); + assertEquals("For search 'test', field 'n1': You can only specify 'dictionary:' for numeric or string fields", e.getMessage()); } } @Test - public void testNonFastSearchFieldsFailsDictionaryControl() throws ParseException { - String def = - "search test {\n" + - " document test {\n" + - " field n1 type int {\n" + - " indexing: summary | attribute\n" + - " dictionary:btree\n" + - " }\n" + - " }\n" + - "}\n"; + public void testNonFastSearchNumericFieldsFailsDictionaryControl() throws ParseException { + String def = TestUtil.joinLines( + "search test {", + " document test {", + " field n1 type int {", + " indexing: summary | attribute", + " dictionary:btree", + " }", + " }", + "}"); try { SearchBuilder sb = SearchBuilder.createFromString(def); fail("Controlling dictionary for non-fast-search fields are not allowed."); |