summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2023-06-02 07:26:40 +0200
committerJon Bratseth <bratseth@vespa.ai>2023-06-02 07:27:53 +0200
commitd19f11334f2f0fa3edd6ffc50db24fdcad49444a (patch)
tree09c4fb88299e37cc210d48ab98a0685c01e89082 /container-search
parentb1b74921d4d632f9ffe20ba56ddf6754ffca57a2 (diff)
Protect against no stems
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java3
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java33
2 files changed, 16 insertions, 20 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
index fd37329eebc..9050b82fd69 100644
--- a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java
@@ -193,9 +193,10 @@ public class StemmingSearcher extends Searcher {
Item blockAsItem = (Item)current;
CompositeItem composite;
List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), context.language);
+ if (segments.isEmpty()) return blockAsItem;
+
String indexName = current.getIndexName();
Substring substring = getOffsets(current);
-
if (segments.size() == 1) {
TaggableItem w = singleWordSegment(current, segments.get(0), index, substring, context.insidePhrase);
setMetaData(current, context.reverseConnectivity, w);
diff --git a/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java
index bcb243b4563..118adae40ca 100644
--- a/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java
@@ -23,7 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
- * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M. Lidal</a>
+ * @author Mathias M. Lidal
*/
public class StemmingSearcherTestCase {
@@ -33,8 +33,8 @@ public class StemmingSearcherTestCase {
@Test
void testStemOnlySomeTerms() {
- assertStem("/search?query=Holes in CVS and Subversion nostem:Found",
- "WEAKAND(100) hole in cvs and subversion nostem:Found");
+ assertStemmed("WEAKAND(100) hole in cvs and subversion nostem:Found", "/search?query=Holes in CVS and Subversion nostem:Found"
+ );
}
@Test
@@ -78,7 +78,7 @@ public class StemmingSearcherTestCase {
@Test
void testDontStemPrefixes() {
- assertStem("/search?query=ist*&language=de", "WEAKAND(100) ist*");
+ assertStemmed("WEAKAND(100) ist*", "/search?query=ist*&language=de");
}
@Test
@@ -90,10 +90,10 @@ public class StemmingSearcherTestCase {
@Test
void testNounStemming() {
- assertStem("/search?query=noun:towers noun:tower noun:tow",
- "WEAKAND(100) noun:tower noun:tower noun:tow");
- assertStem("/search?query=notnoun:towers notnoun:tower notnoun:tow",
- "WEAKAND(100) notnoun:tower notnoun:tower notnoun:tow");
+ assertStemmed("WEAKAND(100) noun:tower noun:tower noun:tow", "/search?query=noun:towers noun:tower noun:tow"
+ );
+ assertStemmed("WEAKAND(100) notnoun:tower notnoun:tower notnoun:tow", "/search?query=notnoun:towers notnoun:tower notnoun:tow"
+ );
}
@SuppressWarnings("deprecation")
@@ -133,11 +133,10 @@ public class StemmingSearcherTestCase {
@Test
void testMultipleStemming() {
- Query q = new Query(QueryTestCase.httpEncode("/search?language=en&search=four&query=trees \"nouns girls\" flowers \"a verbs a\" girls&default-index=foobar"));
- executeStemming(q);
- assertEquals("WEAKAND(100) WORD_ALTERNATIVES foobar:[ tree(0.7) trees(1.0) ] " +
- "foobar:\"noun girl\" WORD_ALTERNATIVES foobar:[ flower(0.7) flowers(1.0) ] " +
- "foobar:\"a verb a\" WORD_ALTERNATIVES foobar:[ girl(0.7) girls(1.0) ]", q.getModel().getQueryTree().getRoot().toString());
+ assertStemmed("WEAKAND(100) WORD_ALTERNATIVES foobar:[ tree(0.7) trees(1.0) ] " +
+ "foobar:\"noun girl\" WORD_ALTERNATIVES foobar:[ flower(0.7) flowers(1.0) ] " +
+ "foobar:\"a verb a\" WORD_ALTERNATIVES foobar:[ girl(0.7) girls(1.0) ]",
+ "/search?language=en&search=four&query=trees \"nouns girls\" flowers \"a verbs a\" girls&default-index=foobar");
}
private Execution.Context newExecutionContext() {
@@ -153,12 +152,8 @@ public class StemmingSearcherTestCase {
newExecutionContext()).search(query);
}
- private void assertStem(String queryString, String expectedQueryTree) {
- assertStemEncoded(QueryTestCase.httpEncode(queryString), expectedQueryTree);
- }
-
- private void assertStemEncoded(String encodedQueryString, String expectedQueryTree) {
- Query query = new Query(encodedQueryString);
+ private void assertStemmed(String expectedQueryTree, String queryString) {
+ Query query = new Query(QueryTestCase.httpEncode(queryString));
executeStemming(query);
assertEquals(expectedQueryTree, query.getModel().getQueryTree().getRoot().toString());
}