aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-12-01 11:21:16 +0100
committerTor Egge <Tor.Egge@online.no>2023-12-01 11:21:16 +0100
commit50120bae7119a8a8554cd0cfdad3bade1a9be7ce (patch)
tree1cb067698b1171405a6f515a0a98e1f30da66f9c /container-search/src/main/java/com/yahoo/search
parent3d35a610b0da4127593d5e98f4c52f397c311d61 (diff)
Lowercase in tokens when needed.
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search')
-rw-r--r--container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java19
1 files changed, 19 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
index d023c1f9143..1889c755833 100644
--- a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
@@ -64,6 +64,8 @@ public abstract class LowercasingSearcher extends Searcher {
}
} else if (next instanceof WordAlternativesItem) {
lowerCase((WordAlternativesItem) next, indexFacts);
+ } else if (next instanceof StringInItem) {
+ lowerCase((StringInItem) next, indexFacts);
}
}
}
@@ -144,6 +146,23 @@ public abstract class LowercasingSearcher extends Searcher {
}
+ private void lowerCase(StringInItem set, IndexFacts.Session indexFacts) {
+ if ( ! syntheticLowerCaseCheck(set.getIndexName(), indexFacts, true)) {
+ return;
+ }
+
+ // set.getTokens() uses Set.copyOf(), thus modification to original set is not reflected in
+ // set being iterated over.
+ var originalTokens = set.getTokens();
+ for (String originalToken : originalTokens) {
+ String token = toLowerCase(originalToken);
+ if (!originalToken.equals(token)) {
+ set.removeToken(originalToken);
+ set.addToken(token);
+ }
+ }
+ }
+
/**
* Override this to control whether a given term should be lowercased.
*