From 50120bae7119a8a8554cd0cfdad3bade1a9be7ce Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 1 Dec 2023 11:21:16 +0100 Subject: Lowercase in tokens when needed. --- container-search/abi-spec.json | 1 + .../java/com/yahoo/prelude/query/StringInItem.java | 4 ++++ .../search/querytransform/LowercasingSearcher.java | 19 ++++++++++++++++++ .../search/querytransform/LowercasingTestCase.java | 23 ++++++++++++++++++++++ 4 files changed, 47 insertions(+) (limited to 'container-search') diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index f2348c58d4d..80833e3e27c 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -1582,6 +1582,7 @@ "public int getTermCount()", "protected void appendBodyString(java.lang.StringBuilder)", "public void addToken(java.lang.String)", + "public void removeToken(java.lang.String)", "public java.util.Collection getTokens()", "public boolean equals(java.lang.Object)", "public int hashCode()" diff --git a/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java index ebcf0de1a21..4473010082e 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/StringInItem.java @@ -68,6 +68,10 @@ public class StringInItem extends InItem { tokens.add(token); } + public void removeToken(String token) { + tokens.remove(token); + } + public Collection getTokens() { return Set.copyOf(tokens); } @Override diff --git a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java index d023c1f9143..1889c755833 100644 --- a/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/querytransform/LowercasingSearcher.java @@ -64,6 +64,8 @@ public abstract class LowercasingSearcher extends Searcher { } } else if (next instanceof WordAlternativesItem) { lowerCase((WordAlternativesItem) next, indexFacts); + } else if (next instanceof StringInItem) { + lowerCase((StringInItem) next, indexFacts); } } } @@ -144,6 +146,23 @@ public abstract class LowercasingSearcher extends Searcher { } + private void lowerCase(StringInItem set, IndexFacts.Session indexFacts) { + if ( ! syntheticLowerCaseCheck(set.getIndexName(), indexFacts, true)) { + return; + } + + // set.getTokens() uses Set.copyOf(), thus modification to original set is not reflected in + // set being iterated over. + var originalTokens = set.getTokens(); + for (String originalToken : originalTokens) { + String token = toLowerCase(originalToken); + if (!originalToken.equals(token)) { + set.removeToken(originalToken); + set.addToken(token); + } + } + } + /** * Override this to control whether a given term should be lowercased. * diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java index c02739e857b..bacc5483a16 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java @@ -11,6 +11,7 @@ import com.yahoo.prelude.IndexModel; import static org.junit.jupiter.api.Assertions.assertEquals; import com.yahoo.prelude.SearchDefinition; import com.yahoo.prelude.query.SameElementItem; +import com.yahoo.prelude.query.StringInItem; import org.junit.jupiter.api.Test; import com.yahoo.component.chain.Chain; import com.yahoo.prelude.Index; @@ -232,4 +233,26 @@ public class LowercasingTestCase { assertEquals("def", w1.getWord()); } + @Test + void testIn() { + Query q = new Query(); + AndItem root = new AndItem(); + StringInItem tmp; + tmp = new StringInItem(BAMSE); + tmp.addToken("AbC"); + root.addItem(tmp); + tmp = new StringInItem(TEDDY); + tmp.addToken("dEf"); + root.addItem(tmp); + q.getModel().getQueryTree().setRoot(root); + Result r = createExecution().search(q); + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + StringInItem w0 = (StringInItem) root.getItem(0); + StringInItem w1 = (StringInItem) root.getItem(1); + assertEquals(1, w0.getTokens().size()); + assertEquals(1, w1.getTokens().size()); + assertEquals("abc", w0.getTokens().iterator().next()); + assertEquals("dEf", w1.getTokens().iterator().next()); + } + } -- cgit v1.2.3