diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-12 17:15:14 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-12 17:16:35 +0200 |
commit | 60e36e0f69c9f52b21a147871a74742840b4e778 (patch) | |
tree | 870d7fa435a2a9bda6c11c98f990893cdfda4860 | |
parent | b4d348347a4adaa46f710108d56a6b79fb5ce9fb (diff) |
Add lowercase commands to string attributes too.
10 files changed, 90 insertions, 7 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java index ae06d34dfb8..084500b61e3 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexInfo.java @@ -3,12 +3,17 @@ package com.yahoo.searchdefinition.derived; import com.yahoo.document.CollectionDataType; import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; import com.yahoo.document.NumericDataType; import com.yahoo.document.PositionDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.document.StructuredDataType; import com.yahoo.searchdefinition.Index; import com.yahoo.searchdefinition.Search; import com.yahoo.searchdefinition.document.Attribute; import com.yahoo.searchdefinition.document.BooleanIndexDefinition; +import com.yahoo.searchdefinition.document.Case; import com.yahoo.searchdefinition.document.FieldSet; import com.yahoo.searchdefinition.document.ImmutableSDField; import com.yahoo.searchdefinition.document.Matching; @@ -47,9 +52,9 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { private static final String CMD_PREDICATE_BOUNDS = "predicate-bounds"; private static final String CMD_NUMERICAL = "numerical"; private static final String CMD_PHRASE_SEGMENTING = "phrase-segmenting"; - private Set<IndexCommand> commands = new java.util.LinkedHashSet<>(); - private Map<String, String> aliases = new java.util.LinkedHashMap<>(); - private Map<String, FieldSet> fieldSets; + private final Set<IndexCommand> commands = new java.util.LinkedHashSet<>(); + private final Map<String, String> aliases = new java.util.LinkedHashMap<>(); + private final Map<String, FieldSet> fieldSets; private Search search; public IndexInfo(Search search) { @@ -132,7 +137,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { addIndexCommand(field, CMD_INDEX); // List the indices - if (field.doesIndexing() || field.doesLowerCasing()) { + if (needLowerCase(field)) { addIndexCommand(field, CMD_LOWERCASE); } @@ -172,6 +177,28 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } + private static boolean isAnyChildString(DataType dataType) { + PrimitiveDataType primitive = dataType.getPrimitiveType(); + if (primitive == PrimitiveDataType.STRING) return true; + if (primitive != null) return false; + if (dataType instanceof StructuredDataType) { + StructuredDataType structured = (StructuredDataType) dataType; + for (Field field : structured.getFields()) { + if (isAnyChildString(field.getDataType())) return true; + } + } else if (dataType instanceof MapDataType) { + MapDataType mapType = (MapDataType) dataType; + return isAnyChildString(mapType.getKeyType()) || isAnyChildString(mapType.getValueType()); + } + return false; + } + + private static boolean needLowerCase(ImmutableSDField field) { + return field.doesIndexing() + || field.doesLowerCasing() + || (field.doesAttributing() && isAnyChildString(field.getDataType()) && field.getMatching().getCase().equals(Case.UNCASED)); + } + static String stemCmd(ImmutableSDField field, Search search) { return CMD_STEM + ":" + field.getStemming(search).toStemMode(); } @@ -316,7 +343,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (field.doesAttributing()) { anyAttributing = true; } - if (field.doesIndexing() || field.doesLowerCasing()) { + if (needLowerCase(field)) { anyLowerCasing = true; } if (stemming(field)) { diff --git a/config-model/src/test/derived/attributeprefetch/index-info.cfg b/config-model/src/test/derived/attributeprefetch/index-info.cfg index dfce12f9b9f..e1b20b33e17 100644 --- a/config-model/src/test/derived/attributeprefetch/index-info.cfg +++ b/config-model/src/test/derived/attributeprefetch/index-info.cfg @@ -146,12 +146,16 @@ indexinfo[].command[].command "type WeightedSet<double>" indexinfo[].command[].indexname "singlestring" indexinfo[].command[].command "index" indexinfo[].command[].indexname "singlestring" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "singlestring" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "singlestring" indexinfo[].command[].command "type string" indexinfo[].command[].indexname "multistring" indexinfo[].command[].command "index" indexinfo[].command[].indexname "multistring" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "multistring" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "multistring" indexinfo[].command[].command "attribute" @@ -160,6 +164,8 @@ indexinfo[].command[].command "type Array<string>" indexinfo[].command[].indexname "wsstring" indexinfo[].command[].command "index" indexinfo[].command[].indexname "wsstring" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "wsstring" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "wsstring" indexinfo[].command[].command "attribute" diff --git a/config-model/src/test/derived/attributes/index-info.cfg b/config-model/src/test/derived/attributes/index-info.cfg index 77a52fa47ba..aa400c7de0a 100644 --- a/config-model/src/test/derived/attributes/index-info.cfg +++ b/config-model/src/test/derived/attributes/index-info.cfg @@ -6,18 +6,24 @@ indexinfo[].command[].command "word" indexinfo[].command[].indexname "a1" indexinfo[].command[].command "index" indexinfo[].command[].indexname "a1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "a1" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "a1" indexinfo[].command[].command "type string" indexinfo[].command[].indexname "a2" indexinfo[].command[].command "index" indexinfo[].command[].indexname "a2" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "a2" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "a2" indexinfo[].command[].command "type string" indexinfo[].command[].indexname "a3" indexinfo[].command[].command "index" indexinfo[].command[].indexname "a3" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "a3" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "a3" indexinfo[].command[].command "type string" @@ -60,6 +66,8 @@ indexinfo[].command[].command "type string" indexinfo[].command[].indexname "b1" indexinfo[].command[].command "index" indexinfo[].command[].indexname "b1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "b1" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "b1" indexinfo[].command[].command "type string" @@ -164,6 +172,8 @@ indexinfo[].command[].command "type int" indexinfo[].command[].indexname "a7_arr" indexinfo[].command[].command "index" indexinfo[].command[].indexname "a7_arr" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "a7_arr" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "a7_arr" indexinfo[].command[].command "attribute" @@ -174,6 +184,8 @@ indexinfo[].command[].command "word" indexinfo[].command[].indexname "a8_arr" indexinfo[].command[].command "index" indexinfo[].command[].indexname "a8_arr" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "a8_arr" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "a8_arr" indexinfo[].command[].command "attribute" diff --git a/config-model/src/test/derived/combinedattributeandindexsearch/index-info.cfg b/config-model/src/test/derived/combinedattributeandindexsearch/index-info.cfg index e7bf410250f..a43c8a0b992 100644 --- a/config-model/src/test/derived/combinedattributeandindexsearch/index-info.cfg +++ b/config-model/src/test/derived/combinedattributeandindexsearch/index-info.cfg @@ -30,12 +30,16 @@ indexinfo[].command[].command "type string" indexinfo[].command[].indexname "attribute1" indexinfo[].command[].command "index" indexinfo[].command[].indexname "attribute1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "attribute1" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "attribute1" indexinfo[].command[].command "type string" indexinfo[].command[].indexname "attribute2" indexinfo[].command[].command "index" indexinfo[].command[].indexname "attribute2" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "attribute2" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "attribute2" indexinfo[].command[].command "type string" diff --git a/config-model/src/test/derived/importedfields/index-info.cfg b/config-model/src/test/derived/importedfields/index-info.cfg index ec44e4f1b11..af67f710ced 100644 --- a/config-model/src/test/derived/importedfields/index-info.cfg +++ b/config-model/src/test/derived/importedfields/index-info.cfg @@ -38,6 +38,8 @@ indexinfo[].command[].command "type int" indexinfo[].command[].indexname "my_string_field" indexinfo[].command[].command "index" indexinfo[].command[].indexname "my_string_field" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "my_string_field" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "my_string_field" indexinfo[].command[].command "type string" @@ -72,6 +74,8 @@ indexinfo[].command[].command "numerical" indexinfo[].command[].indexname "my_ancient_int_field" indexinfo[].command[].command "type int" indexinfo[].command[].indexname "myfieldset" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "myfieldset" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "myfieldset" indexinfo[].command[].command "index" diff --git a/config-model/src/test/derived/indexinfo_lowercase/index-info.cfg b/config-model/src/test/derived/indexinfo_lowercase/index-info.cfg index ac640c09e8c..dc62fc1c101 100644 --- a/config-model/src/test/derived/indexinfo_lowercase/index-info.cfg +++ b/config-model/src/test/derived/indexinfo_lowercase/index-info.cfg @@ -18,6 +18,8 @@ indexinfo[].command[].command "type string" indexinfo[].command[].indexname "nc_attribute" indexinfo[].command[].command "index" indexinfo[].command[].indexname "nc_attribute" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "nc_attribute" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "nc_attribute" indexinfo[].command[].command "type string" @@ -130,6 +132,8 @@ indexinfo[].command[].command "stem:BEST" indexinfo[].command[].indexname "lc_set7" indexinfo[].command[].command "normalize" indexinfo[].command[].indexname "nc_set1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "nc_set1" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "nc_set1" indexinfo[].command[].command "index" @@ -160,6 +164,8 @@ indexinfo[].command[].command "stem:BEST" indexinfo[].command[].indexname "nc_set4" indexinfo[].command[].command "normalize" indexinfo[].command[].indexname "nc_set5" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "nc_set5" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "nc_set5" indexinfo[].command[].command "index" diff --git a/config-model/src/test/derived/indexschema/index-info.cfg b/config-model/src/test/derived/indexschema/index-info.cfg index 8a28038a18d..fd58423b868 100644 --- a/config-model/src/test/derived/indexschema/index-info.cfg +++ b/config-model/src/test/derived/indexschema/index-info.cfg @@ -76,6 +76,8 @@ indexinfo[].command[].command "type position" indexinfo[].command[].indexname "se" indexinfo[].command[].command "index" indexinfo[].command[].indexname "se" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "se" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "se" indexinfo[].command[].command "type string" @@ -112,11 +114,11 @@ indexinfo[].command[].command "type WeightedSet<string>" indexinfo[].command[].indexname "sh" indexinfo[].command[].command "index" indexinfo[].command[].indexname "sh" -indexinfo[].command[].command "lowercase" -indexinfo[].command[].indexname "sh" indexinfo[].command[].command "plain-tokens" indexinfo[].command[].indexname "sh" indexinfo[].command[].command "fullurl" +indexinfo[].command[].indexname "sh" +indexinfo[].command[].command "lowercase" indexinfo[].command[].indexname "sh.sh" indexinfo[].command[].command "fullurl" indexinfo[].command[].indexname "sh.sh" @@ -368,6 +370,8 @@ indexinfo[].command[].command "stem:BEST" indexinfo[].command[].indexname "fs4" indexinfo[].command[].command "normalize" indexinfo[].command[].indexname "onlyattrib" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "onlyattrib" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "onlyattrib" indexinfo[].command[].command "index" diff --git a/config-model/src/test/derived/music/index-info.cfg b/config-model/src/test/derived/music/index-info.cfg index 4d44bc7acbe..306aa157623 100644 --- a/config-model/src/test/derived/music/index-info.cfg +++ b/config-model/src/test/derived/music/index-info.cfg @@ -268,6 +268,8 @@ indexinfo[].command[].command "type string" indexinfo[].command[].indexname "hiphopvalue_arr" indexinfo[].command[].command "index" indexinfo[].command[].indexname "hiphopvalue_arr" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "hiphopvalue_arr" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "hiphopvalue_arr" indexinfo[].command[].command "attribute" @@ -278,6 +280,8 @@ indexinfo[].command[].command "word" indexinfo[].command[].indexname "metalvalue_arr" indexinfo[].command[].command "index" indexinfo[].command[].indexname "metalvalue_arr" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "metalvalue_arr" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "metalvalue_arr" indexinfo[].command[].command "attribute" diff --git a/config-model/src/test/derived/prefixexactattribute/index-info.cfg b/config-model/src/test/derived/prefixexactattribute/index-info.cfg index 941c5b598cf..0203919140f 100644 --- a/config-model/src/test/derived/prefixexactattribute/index-info.cfg +++ b/config-model/src/test/derived/prefixexactattribute/index-info.cfg @@ -18,6 +18,8 @@ indexinfo[].command[].command "type string" indexinfo[].command[].indexname "attributefield1" indexinfo[].command[].command "index" indexinfo[].command[].indexname "attributefield1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "attributefield1" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "attributefield1" indexinfo[].command[].command "type string" @@ -26,6 +28,8 @@ indexinfo[].command[].command "exact @" indexinfo[].command[].indexname "attributefield2" indexinfo[].command[].command "index" indexinfo[].command[].indexname "attributefield2" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "attributefield2" indexinfo[].command[].command "attribute" indexinfo[].command[].indexname "attributefield2" indexinfo[].command[].command "type string" diff --git a/config-model/src/test/derived/types/index-info.cfg b/config-model/src/test/derived/types/index-info.cfg index 3bcf43060fc..2db4ead180b 100644 --- a/config-model/src/test/derived/types/index-info.cfg +++ b/config-model/src/test/derived/types/index-info.cfg @@ -48,6 +48,8 @@ indexinfo[].command[].command "type Array<int>" indexinfo[].command[].indexname "setfield" indexinfo[].command[].command "index" indexinfo[].command[].indexname "setfield" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "setfield" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "setfield" indexinfo[].command[].command "attribute" @@ -56,6 +58,8 @@ indexinfo[].command[].command "type WeightedSet<string>" indexinfo[].command[].indexname "setfield2" indexinfo[].command[].command "index" indexinfo[].command[].indexname "setfield2" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "setfield2" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "setfield2" indexinfo[].command[].command "attribute" @@ -66,6 +70,8 @@ indexinfo[].command[].command "word" indexinfo[].command[].indexname "setfield3" indexinfo[].command[].command "index" indexinfo[].command[].indexname "setfield3" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "setfield3" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "setfield3" indexinfo[].command[].command "attribute" @@ -74,6 +80,8 @@ indexinfo[].command[].command "type WeightedSet<string>" indexinfo[].command[].indexname "setfield4" indexinfo[].command[].command "index" indexinfo[].command[].indexname "setfield4" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "setfield4" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "setfield4" indexinfo[].command[].command "attribute" @@ -82,6 +90,8 @@ indexinfo[].command[].command "type WeightedSet<string>" indexinfo[].command[].indexname "tagfield" indexinfo[].command[].command "index" indexinfo[].command[].indexname "tagfield" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "tagfield" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "tagfield" indexinfo[].command[].command "attribute" @@ -672,6 +682,8 @@ indexinfo[].command[].command "type WeightedSet<string>" indexinfo[].command[].indexname "album1" indexinfo[].command[].command "index" indexinfo[].command[].indexname "album1" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "album1" indexinfo[].command[].command "multivalue" indexinfo[].command[].indexname "album1" indexinfo[].command[].command "attribute" |