From d0addf353b85061e360456f2dba8874a48c00609 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 21 Mar 2024 13:27:11 +0000 Subject: fold AND and SAND items into top-level WEAKAND --- .../com/yahoo/prelude/query/parser/AllParser.java | 22 +++++++- .../test/ExactMatchAndDefaultIndexTestCase.java | 2 +- .../test/StemmingSearcherTestCase.java | 4 +- .../com/yahoo/prelude/test/IndexFactsTestCase.java | 6 +- .../java/com/yahoo/search/test/QueryTestCase.java | 65 ++++++++++++++++------ 5 files changed, 74 insertions(+), 25 deletions(-) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java index 43bd175b348..692269a1412 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java @@ -11,6 +11,7 @@ import com.yahoo.prelude.query.OrItem; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.QueryCanonicalizer; import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.SegmentItem; import com.yahoo.prelude.query.TrueItem; import com.yahoo.prelude.query.WeakAndItem; import com.yahoo.search.query.QueryTree; @@ -98,10 +99,29 @@ public class AllParser extends SimpleParser { return root.getRoot() instanceof NullItem ? null : root.getRoot(); } + private boolean foldIntoAnd(CompositeItem other) { + if (other instanceof AndItem) { + return true; + } + if (weakAnd && other instanceof SegmentItem) { + return true; + } + if (weakAnd && other instanceof PhraseItem phrase) { + return ! phrase.isExplicit(); + } + return false; + } + protected CompositeItem addAnd(Item item, CompositeItem and) { if (and == null) and = createAnd(); - and.addItem(item); + if (item instanceof CompositeItem composite && foldIntoAnd(composite)) { + for (var subItem : composite.items()) { + addAnd(subItem, and); + } + } else { + and.addItem(item); + } return and; } diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java index adcfc721fc9..b998847efa3 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java @@ -35,7 +35,7 @@ public class ExactMatchAndDefaultIndexTestCase { q.getModel().setExecution(new Execution(Execution.Context.createContextStub(facts))); assertEquals("WEAKAND(100) testexact:a/b testexact:foo.com", q.getModel().getQueryTree().getRoot().toString()); q = new Query("?query=" + enc("a/b foo.com")); - assertEquals("WEAKAND(100) (AND a b) (AND foo com)", q.getModel().getQueryTree().getRoot().toString()); + assertEquals("WEAKAND(100) a b foo com", q.getModel().getQueryTree().getRoot().toString()); } @Test diff --git a/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java index 3db51ad4b8a..70af02102a3 100644 --- a/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/querytransform/test/StemmingSearcherTestCase.java @@ -144,8 +144,8 @@ public class StemmingSearcherTestCase { String emoji1 = "\uD83C\uDF49"; // 🍉 String emoji2 = "\uD83D\uDE00"; // 😀 assertStemmed("WEAKAND(100) " + emoji1, "/search?query=" + emoji1); - assertStemmed("WEAKAND(100) (AND " + emoji1 + " " + emoji2 + ")", "/search?query=" + emoji1 + emoji2); - assertStemmed("WEAKAND(100) (AND " + emoji1 + " foo " + emoji2 + ")", "/search?query=" + emoji1 + "foo" + emoji2); + assertStemmed("WEAKAND(100) " + emoji1 + " " + emoji2, "/search?query=" + emoji1 + emoji2); + assertStemmed("WEAKAND(100) " + emoji1 + " foo " + emoji2, "/search?query=" + emoji1 + "foo" + emoji2); } private Execution.Context newExecutionContext() { diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java index c636550c02f..f53b238c5cd 100644 --- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java @@ -50,7 +50,7 @@ public class IndexFactsTestCase { Query q = newQuery("?query=a:b", indexFacts); assertEquals("WEAKAND(100) a:b", q.getModel().getQueryTree().getRoot().toString()); q = newQuery("?query=notarealindex:b", indexFacts); - assertEquals("WEAKAND(100) (AND notarealindex b)", q.getModel().getQueryTree().getRoot().toString()); + assertEquals("WEAKAND(100) notarealindex b", q.getModel().getQueryTree().getRoot().toString()); } @Test @@ -277,8 +277,8 @@ public class IndexFactsTestCase { IndexFacts.Session session2 = indexFacts.newSession(query2.getModel().getSources(), query2.getModel().getRestrict()); assertTrue(session1.getIndex("url").isUriIndex()); assertTrue(session2.getIndex("url").isUriIndex()); - assertEquals("WEAKAND(100) (AND url:https url:foo url:bar)", query1.getModel().getQueryTree().toString()); - assertEquals("WEAKAND(100) (AND url:https url:foo url:bar)", query2.getModel().getQueryTree().toString()); + assertEquals("WEAKAND(100) url:https url:foo url:bar", query1.getModel().getQueryTree().toString()); + assertEquals("WEAKAND(100) url:https url:foo url:bar", query2.getModel().getQueryTree().toString()); } @Test diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java index 6a310180eab..08cd653c1fc 100644 --- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java @@ -1015,7 +1015,9 @@ public class QueryTestCase { @Test void testImplicitPhraseIsDefault() { Query query = new Query(httpEncode("?query=it's fine")); - assertEquals("WEAKAND(100) (SAND it s) fine", query.getModel().getQueryTree().toString()); + assertEquals("WEAKAND(100) it s fine", query.getModel().getQueryTree().toString()); + query = new Query(httpEncode("?query=\"it s\" fine")); + assertEquals("WEAKAND(100) \"it s\" fine", query.getModel().getQueryTree().toString()); } @Test @@ -1034,7 +1036,7 @@ public class QueryTestCase { } @Test - void testImplicitAnd() { + void testImplicitConnectivityInsideWeakAnd() { Query query = new Query(httpEncode("?query=myfield:it's myfield:a.b myfield:c")); SearchDefinition test = new SearchDefinition("test"); @@ -1045,9 +1047,38 @@ public class QueryTestCase { IndexModel indexModel = new IndexModel(test); query.getModel().setExecution(new Execution(Execution.Context.createContextStub(new IndexFacts(indexModel)))); - assertEquals("WEAKAND(100) (SAND myfield:it myfield:s) (AND myfield:a myfield:b) myfield:c", query.getModel().getQueryTree().toString()); + assertEquals("WEAKAND(100) myfield:it myfield:s myfield:a myfield:b myfield:c", query.getModel().getQueryTree().toString()); // 'it' and 's' should have connectivity 1 WeakAndItem root = (WeakAndItem) query.getModel().getQueryTree().getRoot(); + WordItem it = (WordItem) root.getItem(0); + assertEquals("it", it.getWord()); + WordItem s = (WordItem) root.getItem(1); + assertEquals("s", s.getWord()); + assertEquals(s, it.getConnectedItem()); + assertEquals(1.0, it.getConnectivity(), 0.00000001); + assertEquals(0.0, s.getConnectivity(), 0.00000001); + WordItem a = (WordItem) root.getItem(2); + assertEquals(1.0, a.getConnectivity(), 0.00000001); + WordItem b = (WordItem) root.getItem(3); + assertEquals(0.0, b.getConnectivity(), 0.00000001); + WordItem c = (WordItem) root.getItem(4); + } + + @Test + void testImplicitAnd() { + Query query = new Query(httpEncode("?query=myfield:it's myfield:a.b myfield:c&type=all")); + + SearchDefinition test = new SearchDefinition("test"); + Index myField = new Index("myfield"); + myField.addCommand("phrase-segmenting false"); + assertFalse(myField.getPhraseSegmenting()); + test.addIndex(myField); + IndexModel indexModel = new IndexModel(test); + query.getModel().setExecution(new Execution(Execution.Context.createContextStub(new IndexFacts(indexModel)))); + + assertEquals("AND (SAND myfield:it myfield:s) myfield:a myfield:b myfield:c", query.getModel().getQueryTree().toString()); + // 'it' and 's' should have connectivity 1 + AndItem root = (AndItem) query.getModel().getQueryTree().getRoot(); AndSegmentItem sand = (AndSegmentItem) root.getItem(0); WordItem it = (WordItem) sand.getItem(0); assertEquals("it", it.getWord()); @@ -1066,14 +1097,13 @@ public class QueryTestCase { IndexModel indexModel = new IndexModel(test); { - Query query = new Query(httpEncode("?query=myfield:b.c.d")); + Query query = new Query(httpEncode("?query=myfield:b.c.d&type=all")); query.getModel().setExecution(new Execution(Execution.Context.createContextStub(new IndexFacts(indexModel)))); - assertEquals("WEAKAND(100) (AND myfield:b myfield:c myfield:d)", query.getModel().getQueryTree().toString()); - WeakAndItem root = (WeakAndItem) query.getModel().getQueryTree().getRoot(); - AndItem and = (AndItem) root.getItem(0); - WordItem b = (WordItem) and.getItem(0); - WordItem c = (WordItem) and.getItem(1); - WordItem d = (WordItem) and.getItem(2); + assertEquals("AND myfield:b myfield:c myfield:d", query.getModel().getQueryTree().toString()); + AndItem root = (AndItem) query.getModel().getQueryTree().getRoot(); + WordItem b = (WordItem) root.getItem(0); + WordItem c = (WordItem) root.getItem(1); + WordItem d = (WordItem) root.getItem(2); assertEquals(c, b.getConnectedItem()); assertEquals(1.0, b.getConnectivity(), 0.00000001); assertEquals(d, c.getConnectedItem()); @@ -1081,16 +1111,15 @@ public class QueryTestCase { } { - Query query = new Query(httpEncode("?query=myfield:a myfield:b.c.d myfield:e")); + Query query = new Query(httpEncode("?query=myfield:a myfield:b.c.d myfield:e&type=all")); query.getModel().setExecution(new Execution(Execution.Context.createContextStub(new IndexFacts(indexModel)))); - assertEquals("WEAKAND(100) myfield:a (AND myfield:b myfield:c myfield:d) myfield:e", query.getModel().getQueryTree().toString()); - WeakAndItem root = (WeakAndItem) query.getModel().getQueryTree().getRoot(); + assertEquals("AND myfield:a myfield:b myfield:c myfield:d myfield:e", query.getModel().getQueryTree().toString()); + AndItem root = (AndItem) query.getModel().getQueryTree().getRoot(); WordItem a = (WordItem) root.getItem(0); - AndItem and = (AndItem) root.getItem(1); - WordItem b = (WordItem) and.getItem(0); - WordItem c = (WordItem) and.getItem(1); - WordItem d = (WordItem) and.getItem(2); - WordItem e = (WordItem) root.getItem(2); + WordItem b = (WordItem) root.getItem(1); + WordItem c = (WordItem) root.getItem(2); + WordItem d = (WordItem) root.getItem(3); + WordItem e = (WordItem) root.getItem(4); assertNull(a.getConnectedItem()); assertEquals(c, b.getConnectedItem()); assertEquals(1.0, b.getConnectivity(), 0.00000001); -- cgit v1.2.3