From 5614a846b5c4d6980a9880577ffa9253a2112ce0 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 21 Dec 2023 17:32:29 +0100 Subject: Add support for collapsing on multiple fields Changes in FieldCollapsingSearcher: - Allow passing a comma-separated list of fields in collapsefield - Use collapsesize to override the default for all fields - Use collapsesize.fieldname to override the size for a specific field --- .../prelude/searcher/FieldCollapsingSearcher.java | 58 ++++++++-- .../test/FieldCollapsingSearcherTestCase.java | 125 +++++++++++++++++++++ 2 files changed, 173 insertions(+), 10 deletions(-) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index 708c6de1212..5d848bb2ad5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -10,14 +10,16 @@ import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; import com.yahoo.search.result.Hit; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.PhaseNames; +import java.util.Arrays; import java.util.Map; /** - * A searcher which does parametrized collapsing. + * A searcher which does parameterized collapsing. * * @author Steinar Knutsen */ @@ -85,11 +87,14 @@ public class FieldCollapsingSearcher extends Searcher { */ @Override public Result search(com.yahoo.search.Query query, Execution execution) { - String collapseField = query.properties().getString(collapsefield); + String collapseFieldParam = query.properties().getString(collapsefield); - if (collapseField == null) return execution.search(query); + if (collapseFieldParam == null) return execution.search(query); + + String[] collapseFields = collapseFieldParam.split(","); + + int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); - int collapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); query.properties().set(collapse, "0"); int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; @@ -103,12 +108,29 @@ public class FieldCollapsingSearcher extends Searcher { String collapseSummary = query.properties().getString(collapseSummaryName); String summaryClass = (collapseSummary == null) ? query.getPresentation().getSummary() : collapseSummary; - query.trace("Collapsing by '" + collapseField + "' using summary '" + collapseSummary + "'", 2); + query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2); do { resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); - collapse(result, knownCollapses, resultSource, collapseField, collapseSize); + + // collapse by the primary field, using the query-result as the source + // this either fills an empty result, or extends the existing one from a previous iteration + collapse(result, knownCollapses, resultSource, collapseFields[0], + getCollapseSize(query.properties(), collapseFields[0], globalCollapseSize) + ); + + // collapse even further, using the other fields + // using the result as source, we just (possibly) reduce the number of hits + for (int i = 1; i < collapseFields.length; i++) { + Result newResult = new Result(query); + + collapse(newResult, knownCollapses, result, collapseFields[i], + getCollapseSize(query.properties(), collapseFields[i], globalCollapseSize) + ); + + result = newResult; + } hitsAfterCollapse = result.getHitCount(); if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { @@ -159,19 +181,35 @@ public class FieldCollapsingSearcher extends Searcher { continue; } - if (knownCollapses.containsKey(collapseId)) { - int numHitsThisField = knownCollapses.get(collapseId); + // prepending the fieldname is necessary to distinguish between values in the different collapsefields + String collapseKey = collapseField + "_" + collapseId; + + if (knownCollapses.containsKey(collapseKey)) { + int numHitsThisField = knownCollapses.get(collapseKey); if (numHitsThisField < collapseSize) { result.hits().add(hit); ++numHitsThisField; - knownCollapses.put(collapseId, numHitsThisField); + knownCollapses.put(collapseKey, numHitsThisField); } } else { - knownCollapses.put(collapseId, 1); + knownCollapses.put(collapseKey, 1); result.hits().add(hit); } } } + private Integer getCollapseSize(Properties properties, String fieldName, Integer globalCollapseSize) { + Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName)); + + if (fieldCollapseSize != null) { + return fieldCollapseSize; + } + + if (globalCollapseSize != null) { + return globalCollapseSize; + } + + return defaultCollapseSize; + } } diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java index 361079f7595..70251e2cb8e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java @@ -217,6 +217,119 @@ public class FieldCollapsingSearcherTestCase { assertHit("http://acme.org/j.html", 4, 2, r.hits().get(0)); } + /** + * Tests that collapsing hits with 2 fields works, + * this test also shows that field order is important + */ + @Test + void testCollapsingWithMultipleFields() { + // Set up + Map chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + // The searcher turns off collapsing further on in the chain + q.properties().set("collapse", "0"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 0)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 1)); + r.hits().add(createHit("http://acme.org/c.html", 8, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 7, 1, 0)); + r.setTotalHitCount(4); + docsource.addResult(q, r); + + // Test collapsing, starting with amid + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0)); + assertHit("http://acme.org/c.html", 8, 0, 1, r.hits().get(1)); + + + // Test collapsing, starting with bmid + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=bmid,amid"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(1, r.getHitCount()); + assertEquals(2, docsource.getQueryCount()); // 2 because + 1 from above + assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0)); + } + + /** + * Tests that using different collapse sizes for different fields works + */ + @Test + void testCollapsingWithMultipleFieldsAndMultipleCollapseSizes() { + // Set up + Map chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + // The searcher turns off collapsing further on in the chain + q.properties().set("collapse", "0"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0)); + r.setTotalHitCount(4); + docsource.addResult(q, r); + + // Test collapsing + // default collapsesize is used for amid, bmid is set to 2 + q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize.bmid=2"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(1)); + } + + /** + * Tests that using different collapse sizes for different fields works, + * test that the different ways to configure collapse size have the correct precedence + */ + @Test + void testCollapsingWithMultipleFieldsAndMultipleCollapseSizeSources() { + // Set up + Map chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + // The searcher turns off collapsing further on in the chain + q.properties().set("collapse", "0"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0)); + r.hits().add(createHit("http://acme.org/3.html", 8, 1, 0)); + r.setTotalHitCount(5); + docsource.addResult(q, r); + + // Test collapsing + // collapsesize 10 overwrites the default for amid & bmid + // collapsize.bmid overwrites the collapsesize for bmid again + q = new Query("?query=test_collapse&collapsesize=10&collapsefield=amid,bmid&collapsesize.bmid=2"); + r = doSearch(collapse, q, 0, 5, chained); + + assertEquals(4, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/b.html", 9, 1, 0, r.hits().get(1)); + assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(2)); + assertHit("http://acme.org/d.html", 8, 1, 0, r.hits().get(3)); + } + /** * Tests collapsing of "messy" data */ @@ -444,12 +557,24 @@ public class FieldCollapsingSearcherTestCase { return hit; } + private FastHit createHit(String uri,int relevancy,int amid,int bmid) { + FastHit hit = new FastHit(uri,relevancy); + hit.setField("amid", String.valueOf(amid)); + hit.setField("bmid", String.valueOf(bmid)); + return hit; + } + private void assertHit(String uri,int relevancy,int mid,Hit hit) { assertEquals(uri,hit.getId().toString()); assertEquals(relevancy, ((int) hit.getRelevance().getScore())); assertEquals(mid,Integer.parseInt((String) hit.getField("amid"))); } + private void assertHit(String uri,int relevancy,int amid,int bmid,Hit hit) { + assertHit(uri,relevancy,amid,hit); + assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid"))); + } + private static class ZeroHitsControl extends com.yahoo.search.Searcher { public int queryCount = 0; -- cgit v1.2.3