summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-11 11:57:09 +0100
committerGitHub <noreply@github.com>2024-01-11 11:57:09 +0100
commitc35da2bfe42797997cff3c6d42c491c5566698e7 (patch)
tree082dcddbd291840c0ec07b7a4e9065e0d91f98f8 /config-model
parent04d491286aa2a6f8b3a04048936419c6cde4e3ec (diff)
parent1a7e8a2eb5135d3cc04820770ced9aaa51374f89 (diff)
Merge pull request #29844 from vespa-engine/balder/handle-cased-streaming-search
Balder/handle cased streaming search
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java127
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java10
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Matching.java8
-rw-r--r--config-model/src/test/derived/indexschema/vsmfields.cfg24
-rw-r--r--config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg4
-rw-r--r--config-model/src/test/derived/prefixexactattribute/vsmfields.cfg5
-rw-r--r--config-model/src/test/derived/streamingstruct/vsmfields.cfg56
-rw-r--r--config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java49
8 files changed, 173 insertions, 110 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
index 7532dec5187..34f485b7f02 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
@@ -25,6 +25,7 @@ import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.search.config.IndexInfoConfig;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import java.util.Set;
@@ -238,12 +239,8 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
if (DataType.URI.equals(fieldType)) {
return true;
}
- if (fieldType instanceof CollectionDataType &&
- DataType.URI.equals(((CollectionDataType)fieldType).getNestedType()))
- {
- return true;
- }
- return false;
+ return (fieldType instanceof CollectionDataType collectionFieldType) &&
+ DataType.URI.equals(collectionFieldType.getNestedType());
}
private void addUriIndexCommands(ImmutableSDField field) {
@@ -310,7 +307,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
private boolean notInCommands(String index) {
for (IndexCommand command : commands) {
- if (command.getIndex().equals(index)) {
+ if (command.index().equals(index)) {
return false;
}
}
@@ -322,10 +319,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
IndexInfoConfig.Indexinfo.Builder iiB = new IndexInfoConfig.Indexinfo.Builder();
iiB.name(getName());
for (IndexCommand command : commands) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(command.getIndex())
- .command(command.getCommand()));
+ addIndexCommand(iiB, command.index(), command.command());
}
// Make user defined field sets searchable
for (FieldSet fieldSet : fieldSets.values()) {
@@ -335,18 +329,16 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
}
for (Map.Entry<String, String> e : aliases.entrySet()) {
- iiB.alias(
- new IndexInfoConfig.Indexinfo.Alias.Builder()
- .alias(e.getKey())
- .indexname(e.getValue()));
+ iiB.alias(new IndexInfoConfig.Indexinfo.Alias.Builder().alias(e.getKey()).indexname(e.getValue()));
}
builder.indexinfo(iiB);
}
// TODO: Move this to the FieldSetSettings processor (and rename it) as that already has to look at this.
private void addFieldSetCommands(IndexInfoConfig.Indexinfo.Builder iiB, FieldSet fieldSet) {
- for (String qc : fieldSet.queryCommands())
- iiB.command(new IndexInfoConfig.Indexinfo.Command.Builder().indexname(fieldSet.getName()).command(qc));
+ for (String qc : fieldSet.queryCommands()) {
+ addIndexCommand(iiB, fieldSet.getName(), qc);
+ }
boolean anyIndexing = false;
boolean anyAttributing = false;
boolean anyLowerCasing = false;
@@ -397,57 +389,29 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
fieldSetMatching = new Matching();
}
if (anyLowerCasing) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_LOWERCASE));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_LOWERCASE);
}
if (hasMultiValueField(fieldSet)) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_MULTIVALUE));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_MULTIVALUE);
}
if (anyIndexing) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_INDEX));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_INDEX);
if ( ! isExactMatch(fieldSetMatching)) {
if (fieldSetMatching == null || fieldSetMatching.getType().equals(MatchType.TEXT)) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_PLAIN_TOKENS));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_PLAIN_TOKENS);
}
if (anyStemming) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(stemmingCommand));
+ addIndexCommand(iiB, fieldSet.getName(), stemmingCommand);
}
if (anyNormalizing)
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_NORMALIZE));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_NORMALIZE);
if (phraseSegmentingCommand != null)
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(phraseSegmentingCommand));
+ addIndexCommand(iiB, fieldSet.getName(), phraseSegmentingCommand);
}
} else {
// Assume only attribute fields
- iiB
- .command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_ATTRIBUTE))
- .command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_INDEX));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_ATTRIBUTE);
+ addIndexCommand(iiB, fieldSet.getName(), CMD_INDEX);
}
if (anyString) {
addIndexCommand(iiB, fieldSet.getName(), CMD_STRING);
@@ -460,20 +424,11 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
if (fieldSetMatching.getType().equals(MatchType.EXACT)) {
String term = fieldSetMatching.getExactMatchTerminator();
if (term==null) term=ExactMatch.DEFAULT_EXACT_TERMINATOR;
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command("exact "+term));
+ addIndexCommand(iiB, fieldSet.getName(), "exact "+term);
} else if (fieldSetMatching.getType().equals(MatchType.WORD)) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command(CMD_WORD));
+ addIndexCommand(iiB, fieldSet.getName(), CMD_WORD);
} else if (fieldSetMatching.getType().equals(MatchType.GRAM)) {
- iiB.command(
- new IndexInfoConfig.Indexinfo.Command.Builder()
- .indexname(fieldSet.getName())
- .command("ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE)));
+ addIndexCommand(iiB, fieldSet.getName(), "ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE));
} else if (fieldSetMatching.getType().equals(MatchType.TEXT)) {
}
@@ -495,10 +450,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
active = field.getIndex(field.getName()).getStemming();
}
}
- if (active != null) {
- return active;
- }
- return Stemming.BEST; // assume default
+ return Objects.requireNonNullElse(active, Stemming.BEST);
}
private boolean stemming(ImmutableSDField field) {
@@ -514,9 +466,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
private boolean isExactMatch(Matching m) {
if (m == null) return false;
- if (m.getType().equals(MatchType.EXACT)) return true;
- if (m.getType().equals(MatchType.WORD)) return true;
- return false;
+ return m.getType().equals(MatchType.EXACT) || m.getType().equals(MatchType.WORD);
}
@Override
@@ -528,34 +478,13 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
* An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value
* object.
*/
- public static class IndexCommand {
-
- private final String index;
-
- private final String command;
-
- public IndexCommand(String index, String command) {
- this.index = index;
- this.command = command;
- }
-
- public String getIndex() {
- return index;
- }
-
- public String getCommand() {
- return command;
- }
+ public record IndexCommand(String index, String command) {
/**
* Returns true if this is the null command (do nothing)
*/
public boolean isNull() {
- return command.equals("");
- }
-
- public int hashCode() {
- return index.hashCode() + 17 * command.hashCode();
+ return command.isEmpty();
}
public boolean equals(Object object) {
@@ -564,7 +493,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
}
return other.index.equals(this.index) &&
- other.command.equals(this.command);
+ other.command.equals(this.command);
}
public String toString() {
@@ -616,9 +545,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
return false;
}
- if (Stemming.NONE.equals(indexStemming)) {
- // Add nothing
- } else {
+ if ( ! Stemming.NONE.equals(indexStemming)) {
owner.addIndexCommand(indexName, CMD_STEM + ":" + indexStemming.toStemMode());
}
return true;
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java
index cb806d8596e..564161b725d 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java
@@ -14,6 +14,7 @@ import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.schema.FieldSets;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
+import com.yahoo.schema.document.Case;
import com.yahoo.schema.document.FieldSet;
import com.yahoo.schema.document.GeoPos;
import com.yahoo.schema.document.Matching;
@@ -144,7 +145,7 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer {
public static Type GEO_POSITION = new Type("GEOPOS");
public static Type NEAREST_NEIGHBOR = new Type("NEAREST_NEIGHBOR");
- private String searchMethod;
+ private final String searchMethod;
private Type(String searchMethod) {
this.searchMethod = searchMethod;
@@ -261,10 +262,17 @@ public class VsmFields extends Derived implements VsmfieldsConfig.Producer {
return getMatchingName();
}
+ private static VsmfieldsConfig.Fieldspec.Normalize.Enum toNormalize(Matching matching) {
+ if (matching.getType() == MatchType.EXACT) return VsmfieldsConfig.Fieldspec.Normalize.Enum.LOWERCASE;
+ if (matching.getCase() == Case.CASED) return VsmfieldsConfig.Fieldspec.Normalize.Enum.NONE;
+ return VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD;
+ }
+
public VsmfieldsConfig.Fieldspec.Builder getFieldSpecConfig() {
var fB = new VsmfieldsConfig.Fieldspec.Builder();
fB.name(getName())
.searchmethod(VsmfieldsConfig.Fieldspec.Searchmethod.Enum.valueOf(type.getSearchMethod()))
+ .normalize(toNormalize(matching))
.arg1(getArg1())
.fieldtype(isAttribute
? VsmfieldsConfig.Fieldspec.Fieldtype.ATTRIBUTE
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
index 0b542f134ad..9d68553fa80 100644
--- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java
+++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
@@ -46,12 +46,16 @@ public class Matching implements Cloneable, Serializable {
public MatchType getType() { return type; }
public Case getCase() { return casing; }
- public void setType(MatchType type) {
+ public Matching setType(MatchType type) {
this.type = type;
typeUserSet = true;
+ return this;
}
- public void setCase(Case casing) { this.casing = casing; }
+ public Matching setCase(Case casing) {
+ this.casing = casing;
+ return this;
+ }
public Integer maxLength() { return maxLength; }
public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; }
diff --git a/config-model/src/test/derived/indexschema/vsmfields.cfg b/config-model/src/test/derived/indexschema/vsmfields.cfg
index 31db622183e..a2152f9787f 100644
--- a/config-model/src/test/derived/indexschema/vsmfields.cfg
+++ b/config-model/src/test/derived/indexschema/vsmfields.cfg
@@ -3,121 +3,145 @@ searchall 1
fieldspec[].name "sa"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sb"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sc"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sd"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "pos"
fieldspec[].searchmethod GEOPOS
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "se"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "word"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "sf"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sg"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sh"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "si"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "exact1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "exact2"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "bm25_field"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "ia"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "ib"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "ic"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "nostemstring1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "nostemstring2"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "nostemstring3"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "nostemstring4"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "fs9"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "f10.text"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "sd_literal"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "pos_zcurve"
fieldspec[].searchmethod INT64
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
documenttype[].name "indexschema"
diff --git a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg
index f8b1cf62048..ec06d01f05a 100644
--- a/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg
+++ b/config-model/src/test/derived/nearestneighbor_streaming/vsmfields.cfg
@@ -3,21 +3,25 @@ searchall 1
fieldspec[].name "vec_a"
fieldspec[].searchmethod NEAREST_NEIGHBOR
fieldspec[].arg1 "EUCLIDEAN"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "vec_b"
fieldspec[].searchmethod NEAREST_NEIGHBOR
fieldspec[].arg1 "ANGULAR"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "vec_c"
fieldspec[].searchmethod NEAREST_NEIGHBOR
fieldspec[].arg1 "INNERPRODUCT"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "vec_d"
fieldspec[].searchmethod NONE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
documenttype[].name "test"
diff --git a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg
index 29bcde9faad..75192ef3121 100644
--- a/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg
+++ b/config-model/src/test/derived/prefixexactattribute/vsmfields.cfg
@@ -3,26 +3,31 @@ searchall 1
fieldspec[].name "indexfield0"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 79
fieldspec[].fieldtype INDEX
fieldspec[].name "attributefield1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "attributefield2"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "indexfield1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "indexfield2"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "exact"
+fieldspec[].normalize LOWERCASE
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
documenttype[].name "prefixexactattribute"
diff --git a/config-model/src/test/derived/streamingstruct/vsmfields.cfg b/config-model/src/test/derived/streamingstruct/vsmfields.cfg
index 7178f9d41ea..b5a234e8095 100644
--- a/config-model/src/test/derived/streamingstruct/vsmfields.cfg
+++ b/config-model/src/test/derived/streamingstruct/vsmfields.cfg
@@ -3,281 +3,337 @@ searchall 1
fieldspec[].name "coupleof"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "normalfields"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "a.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "a.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "a.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "a.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "m.key"
fieldspec[].searchmethod INT64
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "m.value"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "b.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "b.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "b.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "b.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "word"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "c.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c2.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "word"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "c2.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "suffix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c2.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c2.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c3.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "word"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "c3.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c3.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "c3.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1s.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1s.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1s.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf1s.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "n.nf2"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array1.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array1.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array1.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array1.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array2.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array2.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array2.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array2.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array3.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "word"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype ATTRIBUTE
fieldspec[].name "array3.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "array3.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "subject.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "d.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "d.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "prefix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "d.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "d.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "e.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "e.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "substring"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "e.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "e.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "f.f1"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "suffix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "f.f1s"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 "suffix"
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "f.f2"
fieldspec[].searchmethod INT32
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "f.f3"
fieldspec[].searchmethod DOUBLE
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
fieldspec[].name "g"
fieldspec[].searchmethod AUTOUTF8
fieldspec[].arg1 ""
+fieldspec[].normalize LOWERCASE_AND_FOLD
fieldspec[].maxlength 1048576
fieldspec[].fieldtype INDEX
documenttype[].name "streamingstruct"
diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
index 6423b621ab9..601c014bdc1 100644
--- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
@@ -4,8 +4,12 @@ package com.yahoo.schema.derived;
import com.yahoo.config.model.application.provider.MockFileRegistry;
import com.yahoo.config.model.deploy.TestProperties;
import com.yahoo.config.model.test.MockApplicationPackage;
+import com.yahoo.document.DataType;
import com.yahoo.documentmodel.NewDocumentReferenceDataType;
import com.yahoo.schema.Schema;
+import com.yahoo.schema.document.Case;
+import com.yahoo.schema.document.MatchType;
+import com.yahoo.schema.document.Matching;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.schema.document.TemporarySDField;
@@ -19,24 +23,55 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
*/
public class VsmFieldsTestCase {
- @SuppressWarnings("deprecation")
- @Test
- void reference_type_field_is_unsearchable() {
+ private static Schema createSchema() {
Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties());
var sdoc = new SDDocumentType("test");
schema.addDocument(sdoc);
- SDField refField = new TemporarySDField(sdoc, "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type"));
- refField.parseIndexingScript("{ summary }");
- schema.getDocument().addField(refField);
+ return schema;
+ }
+ private static VsmfieldsConfig vsmfieldsConfig(Schema schema) {
VsmFields vsmFields = new VsmFields(schema);
VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder();
vsmFields.getConfig(cfgBuilder);
- VsmfieldsConfig cfg = cfgBuilder.build();
+ return cfgBuilder.build();
+ }
+
+ @Test
+ void reference_type_field_is_unsearchable() {
+ Schema schema = createSchema();
+ SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type"));
+ field.parseIndexingScript("{ summary }");
+ schema.getDocument().addField(field);
+ VsmfieldsConfig cfg = vsmfieldsConfig(schema);
assertEquals(1, cfg.fieldspec().size());
VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
assertEquals("ref_field", fieldSpec.name());
assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod());
}
+
+ private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) {
+ Schema schema = createSchema();
+ SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING);
+ field.parseIndexingScript("{ index }");
+ field.setMatching(matching);
+ schema.getDocument().addField(field);
+ VsmfieldsConfig cfg = vsmfieldsConfig(schema);
+ VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
+ assertEquals("f", fieldSpec.name());
+ assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod());
+ assertEquals(normalize, fieldSpec.normalize());
+ assertEquals(arg1, fieldSpec.arg1());
+ }
+
+ @Test
+ void test_exact_string() {
+ testIndexMatching(new Matching(MatchType.TEXT),
+ VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, "");
+ testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED),
+ VsmfieldsConfig.Fieldspec.Normalize.NONE, "");
+ testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED),
+ VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact");
+ }
}