diff options
Diffstat (limited to 'config-model')
8 files changed, 173 insertions, 110 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java index 7532dec5187..34f485b7f02 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -25,6 +25,7 @@ import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.search.config.IndexInfoConfig; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -238,12 +239,8 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (DataType.URI.equals(fieldType)) { return true; } - if (fieldType instanceof CollectionDataType && - DataType.URI.equals(((CollectionDataType)fieldType).getNestedType())) - { - return true; - } - return false; + return (fieldType instanceof CollectionDataType collectionFieldType) && + DataType.URI.equals(collectionFieldType.getNestedType()); } private void addUriIndexCommands(ImmutableSDField field) { @@ -310,7 +307,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { private boolean notInCommands(String index) { for (IndexCommand command : commands) { - if (command.getIndex().equals(index)) { + if (command.index().equals(index)) { return false; } } @@ -322,10 +319,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { IndexInfoConfig.Indexinfo.Builder iiB = new IndexInfoConfig.Indexinfo.Builder(); iiB.name(getName()); for (IndexCommand command : commands) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(command.getIndex()) - .command(command.getCommand())); + addIndexCommand(iiB, command.index(), command.command()); } // Make user defined field sets searchable for (FieldSet fieldSet : fieldSets.values()) { @@ -335,18 +329,16 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } for (Map.Entry<String, String> e : aliases.entrySet()) { - iiB.alias( - new IndexInfoConfig.Indexinfo.Alias.Builder() - .alias(e.getKey()) - .indexname(e.getValue())); + iiB.alias(new IndexInfoConfig.Indexinfo.Alias.Builder().alias(e.getKey()).indexname(e.getValue())); } builder.indexinfo(iiB); } // TODO: Move this to the FieldSetSettings processor (and rename it) as that already has to look at this. private void addFieldSetCommands(IndexInfoConfig.Indexinfo.Builder iiB, FieldSet fieldSet) { - for (String qc : fieldSet.queryCommands()) - iiB.command(new IndexInfoConfig.Indexinfo.Command.Builder().indexname(fieldSet.getName()).command(qc)); + for (String qc : fieldSet.queryCommands()) { + addIndexCommand(iiB, fieldSet.getName(), qc); + } boolean anyIndexing = false; boolean anyAttributing = false; boolean anyLowerCasing = false; @@ -397,57 +389,29 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { fieldSetMatching = new Matching(); } if (anyLowerCasing) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_LOWERCASE)); + addIndexCommand(iiB, fieldSet.getName(), CMD_LOWERCASE); } if (hasMultiValueField(fieldSet)) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_MULTIVALUE)); + addIndexCommand(iiB, fieldSet.getName(), CMD_MULTIVALUE); } if (anyIndexing) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_INDEX)); + addIndexCommand(iiB, fieldSet.getName(), CMD_INDEX); if ( ! isExactMatch(fieldSetMatching)) { if (fieldSetMatching == null || fieldSetMatching.getType().equals(MatchType.TEXT)) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_PLAIN_TOKENS)); + addIndexCommand(iiB, fieldSet.getName(), CMD_PLAIN_TOKENS); } if (anyStemming) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(stemmingCommand)); + addIndexCommand(iiB, fieldSet.getName(), stemmingCommand); } if (anyNormalizing) - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_NORMALIZE)); + addIndexCommand(iiB, fieldSet.getName(), CMD_NORMALIZE); if (phraseSegmentingCommand != null) - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(phraseSegmentingCommand)); + addIndexCommand(iiB, fieldSet.getName(), phraseSegmentingCommand); } } else { // Assume only attribute fields - iiB - .command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_ATTRIBUTE)) - .command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_INDEX)); + addIndexCommand(iiB, fieldSet.getName(), CMD_ATTRIBUTE); + addIndexCommand(iiB, fieldSet.getName(), CMD_INDEX); } if (anyString) { addIndexCommand(iiB, fieldSet.getName(), CMD_STRING); @@ -460,20 +424,11 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (fieldSetMatching.getType().equals(MatchType.EXACT)) { String term = fieldSetMatching.getExactMatchTerminator(); if (term==null) term=ExactMatch.DEFAULT_EXACT_TERMINATOR; - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command("exact "+term)); + addIndexCommand(iiB, fieldSet.getName(), "exact "+term); } else if (fieldSetMatching.getType().equals(MatchType.WORD)) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command(CMD_WORD)); + addIndexCommand(iiB, fieldSet.getName(), CMD_WORD); } else if (fieldSetMatching.getType().equals(MatchType.GRAM)) { - iiB.command( - new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(fieldSet.getName()) - .command("ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE))); + addIndexCommand(iiB, fieldSet.getName(), "ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE)); } else if (fieldSetMatching.getType().equals(MatchType.TEXT)) { } @@ -495,10 +450,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { active = field.getIndex(field.getName()).getStemming(); } } - if (active != null) { - return active; - } - return Stemming.BEST; // assume default + return Objects.requireNonNullElse(active, Stemming.BEST); } private boolean stemming(ImmutableSDField field) { @@ -514,9 +466,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { private boolean isExactMatch(Matching m) { if (m == null) return false; - if (m.getType().equals(MatchType.EXACT)) return true; - if (m.getType().equals(MatchType.WORD)) return true; - return false; + return m.getType().equals(MatchType.EXACT) || m.getType().equals(MatchType.WORD); } @Override @@ -528,34 +478,13 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { * An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value * object. */ - public static class IndexCommand { - - private final String index; - - private final String command; - - public IndexCommand(String index, String command) { - this.index = index; - this.command = command; - } - - public String getIndex() { - return index; - } - - public String getCommand() { - return command; - } + public record IndexCommand(String index, String command) { /** * Returns true if this is the null command (do nothing) */ public boolean isNull() { - return command.equals(""); - } - - public int hashCode() { - return index.hashCode() + 17 * command.hashCode(); + return command.isEmpty(); } public boolean equals(Object object) { @@ -564,7 +493,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } return other.index.equals(this.index) && - other.command.equals(this.command); + other.command.equals(this.command); } public String toString() { @@ -616,9 +545,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { return false; } - if (Stemming.NONE.equals(indexStemming)) { - // Add nothing - } else { + if ( ! Stemming.NONE.equals(indexStemming)) { owner.addIndexCommand(indexName, CMD_STEM + ":" + indexStemming.toStemMode()); } return true; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java index cb806d8596e..564161b725d 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java @@ -14,6 +14,7 @@ import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.schema.FieldSets; import com.yahoo.schema.Schema; import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; import com.yahoo.schema.document.FieldSet; import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.Matching; @@ -144,7 +145,7 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer { public static Type GEO_POSITION = new Type("GEOPOS"); public static Type NEAREST_NEIGHBOR = new Type("NEAREST_NEIGHBOR"); - private String searchMethod; + private final String searchMethod; private Type(String searchMethod) { this.searchMethod = searchMethod; @@ -261,10 +262,17 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer { return getMatchingName(); } + private static VsmfieldsConfig.Fieldspec.Normalize.Enum toNormalize(Matching matching) { + if (matching.getType() == MatchType.EXACT) return VsmfieldsConfig.Fieldspec.Normalize.Enum.LOWERCASE; + if (matching.getCase() == Case.CASED) return VsmfieldsConfig.Fieldspec.Normalize.Enum.NONE; + return VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD; + } + public VsmfieldsConfig.Fieldspec.Builder getFieldSpecConfig() { var fB = new VsmfieldsConfig.Fieldspec.Builder(); fB.name(getName()) .searchmethod(VsmfieldsConfig.Fieldspec.Searchmethod.Enum.valueOf(type.getSearchMethod())) + .normalize(toNormalize(matching)) .arg1(getArg1()) .fieldtype(isAttribute ? VsmfieldsConfig.Fieldspec.Fieldtype.ATTRIBUTE diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 0b542f134ad..9d68553fa80 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -46,12 +46,16 @@ public class Matching implements Cloneable, Serializable { public MatchType getType() { return type; } public Case getCase() { return casing; } - public void setType(MatchType type) { + public Matching setType(MatchType type) { this.type = type; typeUserSet = true; + return this; } - public void setCase(Case casing) { this.casing = casing; } + public Matching setCase(Case casing) { + this.casing = casing; + return this; + } public Integer maxLength() { return maxLength; } public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; } diff --git a/config-model/src/test/derived/indexschema/vsmfields.cfg b/config-model/src/test/derived/indexschema/vsmfields.cfg index 31db622183e..a2152f9787f 100644 --- a/config-model/src/test/derived/indexschema/vsmfields.cfg +++ b/config-model/src/test/derived/indexschema/vsmfields.cfg @@ -3,121 +3,145 @@ searchall 1 fieldspec[].name "sa" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sb" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sc" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sd" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "pos" fieldspec[].searchmethod GEOPOS fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "se" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "sf" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sg" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sh" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "si" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "exact1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "exact2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "bm25_field" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "ia" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "ib" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "ic" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "nostemstring1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring3" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring4" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "fs9" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f10.text" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sd_literal" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "pos_zcurve" fieldspec[].searchmethod INT64 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE documenttype[].name "indexschema" diff --git a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg index f8b1cf62048..ec06d01f05a 100644 --- a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg +++ b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg @@ -3,21 +3,25 @@ searchall 1 fieldspec[].name "vec_a" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "EUCLIDEAN" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_b" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "ANGULAR" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_c" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "INNERPRODUCT" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_d" fieldspec[].searchmethod NONE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE documenttype[].name "test" diff --git a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg index 29bcde9faad..75192ef3121 100644 --- a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg +++ b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg @@ -3,26 +3,31 @@ searchall 1 fieldspec[].name "indexfield0" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 79 fieldspec[].fieldtype INDEX fieldspec[].name "attributefield1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "attributefield2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "indexfield1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "indexfield2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX documenttype[].name "prefixexactattribute" diff --git a/config-model/src/test/derived/streamingstruct/vsmfields.cfg b/config-model/src/test/derived/streamingstruct/vsmfields.cfg index 7178f9d41ea..b5a234e8095 100644 --- a/config-model/src/test/derived/streamingstruct/vsmfields.cfg +++ b/config-model/src/test/derived/streamingstruct/vsmfields.cfg @@ -3,281 +3,337 @@ searchall 1 fieldspec[].name "coupleof" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "normalfields" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "m.key" fieldspec[].searchmethod INT64 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "m.value" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c2.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c3.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array3.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "array3.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array3.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "subject.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "g" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX documenttype[].name "streamingstruct" diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index 6423b621ab9..601c014bdc1 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -4,8 +4,12 @@ package com.yahoo.schema.derived; import com.yahoo.config.model.application.provider.MockFileRegistry; import com.yahoo.config.model.deploy.TestProperties; import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DataType; import com.yahoo.documentmodel.NewDocumentReferenceDataType; import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Matching; import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.TemporarySDField; @@ -19,24 +23,55 @@ import static org.junit.jupiter.api.Assertions.assertEquals; */ public class VsmFieldsTestCase { - @SuppressWarnings("deprecation") - @Test - void reference_type_field_is_unsearchable() { + private static Schema createSchema() { Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties()); var sdoc = new SDDocumentType("test"); schema.addDocument(sdoc); - SDField refField = new TemporarySDField(sdoc, "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); - refField.parseIndexingScript("{ summary }"); - schema.getDocument().addField(refField); + return schema; + } + private static VsmfieldsConfig vsmfieldsConfig(Schema schema) { VsmFields vsmFields = new VsmFields(schema); VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder(); vsmFields.getConfig(cfgBuilder); - VsmfieldsConfig cfg = cfgBuilder.build(); + return cfgBuilder.build(); + } + + @Test + void reference_type_field_is_unsearchable() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); + field.parseIndexingScript("{ summary }"); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); assertEquals(1, cfg.fieldspec().size()); VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); assertEquals("ref_field", fieldSpec.name()); assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod()); } + + private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ index }"); + field.setMatching(matching); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("f", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); + assertEquals(normalize, fieldSpec.normalize()); + assertEquals(arg1, fieldSpec.arg1()); + } + + @Test + void test_exact_string() { + testIndexMatching(new Matching(MatchType.TEXT), + VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, ""); + testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED), + VsmfieldsConfig.Fieldspec.Normalize.NONE, ""); + testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED), + VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact"); + } } |