diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-10 10:17:50 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-10 10:17:50 +0100 |
commit | 3f7017773ce147a2d65a9835acdfd682dfafd54a (patch) | |
tree | f3f67620ecb19db0ef7a6ce0150abcfe407d9199 /config-model | |
parent | 02c5bce07737a899726097e577c6dd1121ca5a7c (diff) |
Generate correct vsmfields config for cased search.
Diffstat (limited to 'config-model')
8 files changed, 184 insertions, 48 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java index 7532dec5187..0ee675cb36d 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -310,7 +310,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { private boolean notInCommands(String index) { for (IndexCommand command : commands) { - if (command.getIndex().equals(index)) { + if (command.index().equals(index)) { return false; } } @@ -324,8 +324,8 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { for (IndexCommand command : commands) { iiB.command( new IndexInfoConfig.Indexinfo.Command.Builder() - .indexname(command.getIndex()) - .command(command.getCommand())); + .indexname(command.index()) + .command(command.command())); } // Make user defined field sets searchable for (FieldSet fieldSet : fieldSets.values()) { @@ -525,54 +525,33 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } /** - * An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value - * object. - */ - public static class IndexCommand { - - private final String index; - - private final String command; - - public IndexCommand(String index, String command) { - this.index = index; - this.command = command; - } - - public String getIndex() { - return index; - } - - public String getCommand() { - return command; - } + * An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value + * object. + */ + public record IndexCommand(String index, String command) { /** * Returns true if this is the null command (do nothing) */ public boolean isNull() { - return command.equals(""); - } - - public int hashCode() { - return index.hashCode() + 17 * command.hashCode(); + return command.isEmpty(); } public boolean equals(Object object) { - if (!(object instanceof IndexCommand other)) { - return false; + if (!(object instanceof IndexCommand other)) { + return false; + } + + return other.index.equals(this.index) && + other.command.equals(this.command); } - return other.index.equals(this.index) && - other.command.equals(this.command); - } + public String toString() { + return "index command " + command + " on index " + index; + } - public String toString() { - return "index command " + command + " on index " + index; } - } - /** * A command which may override the command setting of a field for a particular index */ diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java index cb806d8596e..564161b725d 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java @@ -14,6 +14,7 @@ import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.schema.FieldSets; import com.yahoo.schema.Schema; import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; import com.yahoo.schema.document.FieldSet; import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.Matching; @@ -144,7 +145,7 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer { public static Type GEO_POSITION = new Type("GEOPOS"); public static Type NEAREST_NEIGHBOR = new Type("NEAREST_NEIGHBOR"); - private String searchMethod; + private final String searchMethod; private Type(String searchMethod) { this.searchMethod = searchMethod; @@ -261,10 +262,17 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer { return getMatchingName(); } + private static VsmfieldsConfig.Fieldspec.Normalize.Enum toNormalize(Matching matching) { + if (matching.getType() == MatchType.EXACT) return VsmfieldsConfig.Fieldspec.Normalize.Enum.LOWERCASE; + if (matching.getCase() == Case.CASED) return VsmfieldsConfig.Fieldspec.Normalize.Enum.NONE; + return VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD; + } + public VsmfieldsConfig.Fieldspec.Builder getFieldSpecConfig() { var fB = new VsmfieldsConfig.Fieldspec.Builder(); fB.name(getName()) .searchmethod(VsmfieldsConfig.Fieldspec.Searchmethod.Enum.valueOf(type.getSearchMethod())) + .normalize(toNormalize(matching)) .arg1(getArg1()) .fieldtype(isAttribute ? VsmfieldsConfig.Fieldspec.Fieldtype.ATTRIBUTE diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 0b542f134ad..9d68553fa80 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -46,12 +46,16 @@ public class Matching implements Cloneable, Serializable { public MatchType getType() { return type; } public Case getCase() { return casing; } - public void setType(MatchType type) { + public Matching setType(MatchType type) { this.type = type; typeUserSet = true; + return this; } - public void setCase(Case casing) { this.casing = casing; } + public Matching setCase(Case casing) { + this.casing = casing; + return this; + } public Integer maxLength() { return maxLength; } public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; } diff --git a/config-model/src/test/derived/indexschema/vsmfields.cfg b/config-model/src/test/derived/indexschema/vsmfields.cfg index 31db622183e..a2152f9787f 100644 --- a/config-model/src/test/derived/indexschema/vsmfields.cfg +++ b/config-model/src/test/derived/indexschema/vsmfields.cfg @@ -3,121 +3,145 @@ searchall 1 fieldspec[].name "sa" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sb" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sc" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sd" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "pos" fieldspec[].searchmethod GEOPOS fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "se" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "sf" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sg" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sh" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "si" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "exact1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "exact2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "bm25_field" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "ia" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "ib" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "ic" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "nostemstring1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring3" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "nostemstring4" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "fs9" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f10.text" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "sd_literal" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "pos_zcurve" fieldspec[].searchmethod INT64 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE documenttype[].name "indexschema" diff --git a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg index f8b1cf62048..ec06d01f05a 100644 --- a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg +++ b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg @@ -3,21 +3,25 @@ searchall 1 fieldspec[].name "vec_a" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "EUCLIDEAN" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_b" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "ANGULAR" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_c" fieldspec[].searchmethod NEAREST_NEIGHBOR fieldspec[].arg1 "INNERPRODUCT" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "vec_d" fieldspec[].searchmethod NONE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE documenttype[].name "test" diff --git a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg index 29bcde9faad..75192ef3121 100644 --- a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg +++ b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg @@ -3,26 +3,31 @@ searchall 1 fieldspec[].name "indexfield0" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 79 fieldspec[].fieldtype INDEX fieldspec[].name "attributefield1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "attributefield2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "indexfield1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "indexfield2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" +fieldspec[].normalize LOWERCASE fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX documenttype[].name "prefixexactattribute" diff --git a/config-model/src/test/derived/streamingstruct/vsmfields.cfg b/config-model/src/test/derived/streamingstruct/vsmfields.cfg index 7178f9d41ea..b5a234e8095 100644 --- a/config-model/src/test/derived/streamingstruct/vsmfields.cfg +++ b/config-model/src/test/derived/streamingstruct/vsmfields.cfg @@ -3,281 +3,337 @@ searchall 1 fieldspec[].name "coupleof" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "normalfields" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "a.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "m.key" fieldspec[].searchmethod INT64 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "m.value" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "b.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c2.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c2.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "c3.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "c3.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf1s.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "n.nf2" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array1.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array2.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array3.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "word" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype ATTRIBUTE fieldspec[].name "array3.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "array3.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "subject.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "prefix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "d.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "substring" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "e.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f1" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f1s" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "suffix" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f2" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "f.f3" fieldspec[].searchmethod DOUBLE fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX fieldspec[].name "g" fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "" +fieldspec[].normalize LOWERCASE_AND_FOLD fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX documenttype[].name "streamingstruct" diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index 6423b621ab9..bc74173d7b8 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -4,8 +4,12 @@ package com.yahoo.schema.derived; import com.yahoo.config.model.application.provider.MockFileRegistry; import com.yahoo.config.model.deploy.TestProperties; import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DataType; import com.yahoo.documentmodel.NewDocumentReferenceDataType; import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Matching; import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.TemporarySDField; @@ -19,24 +23,76 @@ import static org.junit.jupiter.api.Assertions.assertEquals; */ public class VsmFieldsTestCase { - @SuppressWarnings("deprecation") - @Test - void reference_type_field_is_unsearchable() { + private static Schema createSchema() { Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties()); var sdoc = new SDDocumentType("test"); schema.addDocument(sdoc); - SDField refField = new TemporarySDField(sdoc, "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); - refField.parseIndexingScript("{ summary }"); - schema.getDocument().addField(refField); + return schema; + } + private static VsmfieldsConfig vsmfieldsConfig(Schema schema) { VsmFields vsmFields = new VsmFields(schema); VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder(); vsmFields.getConfig(cfgBuilder); - VsmfieldsConfig cfg = cfgBuilder.build(); + return cfgBuilder.build(); + } + + @Test + void reference_type_field_is_unsearchable() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); + field.parseIndexingScript("{ summary }"); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); assertEquals(1, cfg.fieldspec().size()); VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); assertEquals("ref_field", fieldSpec.name()); assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod()); } + + @Test + void test_exact_string() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ index }"); + field.setMatching(new Matching(MatchType.EXACT).setCase(Case.CASED)); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("f", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); + assertEquals(VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, fieldSpec.normalize()); + assertEquals("exact", fieldSpec.arg1()); + } + + @Test + void test_string() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ index }"); + field.setMatching(new Matching(MatchType.TEXT)); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("f", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); + assertEquals(VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, fieldSpec.normalize()); + assertEquals("", fieldSpec.arg1()); + } + + @Test + void test_cased_string() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ index }"); + field.setMatching(new Matching(MatchType.TEXT).setCase(Case.CASED)); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("f", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); + assertEquals(VsmfieldsConfig.Fieldspec.Normalize.NONE, fieldSpec.normalize()); + assertEquals("", fieldSpec.arg1()); + } } |