summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-12-01 11:21:16 +0100
committerTor Egge <Tor.Egge@online.no>2023-12-01 11:21:16 +0100
commit50120bae7119a8a8554cd0cfdad3bade1a9be7ce (patch)
tree1cb067698b1171405a6f515a0a98e1f30da66f9c /container-search
parent3d35a610b0da4127593d5e98f4c52f397c311d61 (diff)
Lowercase in tokens when needed.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/abi-spec.json1
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java19
-rw-r--r--container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java23
4 files changed, 47 insertions, 0 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index f2348c58d4d..80833e3e27c 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -1582,6 +1582,7 @@
"public int getTermCount()",
"protected void appendBodyString(java.lang.StringBuilder)",
"public void addToken(java.lang.String)",
+ "public void removeToken(java.lang.String)",
"public java.util.Collection getTokens()",
"public boolean equals(java.lang.Object)",
"public int hashCode()"
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java
index ebcf0de1a21..4473010082e 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java
@@ -68,6 +68,10 @@ public class StringInItem extends InItem {
tokens.add(token);
}
+ public void removeToken(String token) {
+ tokens.remove(token);
+ }
+
public Collection<String> getTokens() { return Set.copyOf(tokens); }
@Override
diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
index d023c1f9143..1889c755833 100644
--- a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java
@@ -64,6 +64,8 @@ public abstract class LowercasingSearcher extends Searcher {
}
} else if (next instanceof WordAlternativesItem) {
lowerCase((WordAlternativesItem) next, indexFacts);
+ } else if (next instanceof StringInItem) {
+ lowerCase((StringInItem) next, indexFacts);
}
}
}
@@ -144,6 +146,23 @@ public abstract class LowercasingSearcher extends Searcher {
}
+ private void lowerCase(StringInItem set, IndexFacts.Session indexFacts) {
+ if ( ! syntheticLowerCaseCheck(set.getIndexName(), indexFacts, true)) {
+ return;
+ }
+
+ // set.getTokens() uses Set.copyOf(), thus modification to original set is not reflected in
+ // set being iterated over.
+ var originalTokens = set.getTokens();
+ for (String originalToken : originalTokens) {
+ String token = toLowerCase(originalToken);
+ if (!originalToken.equals(token)) {
+ set.removeToken(originalToken);
+ set.addToken(token);
+ }
+ }
+ }
+
/**
* Override this to control whether a given term should be lowercased.
*
diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java
index c02739e857b..bacc5483a16 100644
--- a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java
@@ -11,6 +11,7 @@ import com.yahoo.prelude.IndexModel;
import static org.junit.jupiter.api.Assertions.assertEquals;
import com.yahoo.prelude.SearchDefinition;
import com.yahoo.prelude.query.SameElementItem;
+import com.yahoo.prelude.query.StringInItem;
import org.junit.jupiter.api.Test;
import com.yahoo.component.chain.Chain;
import com.yahoo.prelude.Index;
@@ -232,4 +233,26 @@ public class LowercasingTestCase {
assertEquals("def", w1.getWord());
}
+ @Test
+ void testIn() {
+ Query q = new Query();
+ AndItem root = new AndItem();
+ StringInItem tmp;
+ tmp = new StringInItem(BAMSE);
+ tmp.addToken("AbC");
+ root.addItem(tmp);
+ tmp = new StringInItem(TEDDY);
+ tmp.addToken("dEf");
+ root.addItem(tmp);
+ q.getModel().getQueryTree().setRoot(root);
+ Result r = createExecution().search(q);
+ root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
+ StringInItem w0 = (StringInItem) root.getItem(0);
+ StringInItem w1 = (StringInItem) root.getItem(1);
+ assertEquals(1, w0.getTokens().size());
+ assertEquals(1, w1.getTokens().size());
+ assertEquals("abc", w0.getTokens().iterator().next());
+ assertEquals("dEf", w1.getTokens().iterator().next());
+ }
+
}