diff options
author | Jon Bratseth <bratseth@gmail.com> | 2024-01-09 09:36:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-09 09:36:47 +0100 |
commit | 0e405a76c44ba1b61e443f455291e2c9163ab0cc (patch) | |
tree | e52da938352e770e12d84f69eee4720253971012 | |
parent | febffe8e9cf98f552c5c46e75e88f0aaa6072a2f (diff) | |
parent | a3ed28deef78118a39512c3a25b86b5191e4a7b9 (diff) |
Merge pull request #29733 from tomglk/multifield-collapsingv8.284.4
Add support for collapsing on multiple fields
2 files changed, 453 insertions, 158 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index 708c6de1212..0559bd808bc 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -6,18 +6,20 @@ import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.chain.dependencies.Before; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; -import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.Properties; import com.yahoo.search.result.Hit; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.PhaseNames; +import java.util.Arrays; import java.util.Map; /** - * A searcher which does parametrized collapsing. + * A searcher which does parameterized collapsing. * * @author Steinar Knutsen */ @@ -30,12 +32,16 @@ public class FieldCollapsingSearcher extends Searcher { private static final CompoundName collapsesize = CompoundName.from("collapsesize"); private static final CompoundName collapseSummaryName = CompoundName.from("collapse.summary"); + /** Separator used for the fieldnames in collapsefield */ + private static final String separator = ","; + /** Maximum number of queries to send next searcher */ private static final int maxQueries = 4; /** * The max number of hits that will be preserved per unique - * value of the collapsing parameter. + * value of the collapsing parameter, + * if no field-specific value is configured. */ private int defaultCollapseSize; @@ -85,11 +91,14 @@ public class FieldCollapsingSearcher extends Searcher { */ @Override public Result search(com.yahoo.search.Query query, Execution execution) { - String collapseField = query.properties().getString(collapsefield); + String collapseFieldParam = query.properties().getString(collapsefield); + + if (collapseFieldParam == null) return execution.search(query); + + String[] collapseFields = collapseFieldParam.split(separator); - if (collapseField == null) return execution.search(query); + int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); - int collapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize); query.properties().set(collapse, "0"); int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; @@ -103,12 +112,15 @@ public class FieldCollapsingSearcher extends Searcher { String collapseSummary = query.properties().getString(collapseSummaryName); String summaryClass = (collapseSummary == null) ? query.getPresentation().getSummary() : collapseSummary; - query.trace("Collapsing by '" + collapseField + "' using summary '" + collapseSummary + "'", 2); + query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2); do { resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); - collapse(result, knownCollapses, resultSource, collapseField, collapseSize); + + collapse(result, knownCollapses, resultSource, + collapseFields, query.properties(), globalCollapseSize + ); hitsAfterCollapse = result.getHitCount(); if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { @@ -143,35 +155,63 @@ public class FieldCollapsingSearcher extends Searcher { /** * Collapse logic. Preserves only maxHitsPerField hits - * for each unique value of the collapsing parameter. + * for each unique value of the collapsing parameters. + * Uses collapsefields sequentially. */ - private void collapse(Result result, Map<String, Integer> knownCollapses, - Result resultSource, String collapseField, int collapseSize) { + private void collapse(Result result, Map<String, Integer> knownCollapses, Result resultSource, + String[] collapseFields, Properties queryProperties, int globalCollapseSize) { + for (Hit unknownHit : resultSource.hits()) { if (!(unknownHit instanceof FastHit hit)) { result.hits().add(unknownHit); continue; } - Object peek = hit.getField(collapseField); - String collapseId = peek != null ? peek.toString() : null; - if (collapseId == null) { - result.hits().add(hit); - continue; - } - if (knownCollapses.containsKey(collapseId)) { - int numHitsThisField = knownCollapses.get(collapseId); + boolean addHit = true; - if (numHitsThisField < collapseSize) { - result.hits().add(hit); - ++numHitsThisField; - knownCollapses.put(collapseId, numHitsThisField); + for (String collapseField : collapseFields) { + + Object peek = hit.getField(collapseField); + String collapseId = peek != null ? peek.toString() : null; + if (collapseId == null) { + continue; + } + + // prepending the fieldname is necessary to distinguish between values in the different collapsefields + // @ cannot occur in fieldnames + String collapseKey = collapseField + "@" + collapseId; + + if (knownCollapses.containsKey(collapseKey)) { + int numHitsThisField = knownCollapses.get(collapseKey); + int collapseSize = getCollapseSize(queryProperties, collapseField, globalCollapseSize); + + if (numHitsThisField < collapseSize) { + ++numHitsThisField; + knownCollapses.put(collapseKey, numHitsThisField); + } else { + addHit = false; + // immediate return, so that following collapseFields do not record the fieldvalues of this hit + // needed for sequential collapsing, otherwise later collapsefields would remove too many hits + break; + } + } else { + knownCollapses.put(collapseKey, 1); } - } else { - knownCollapses.put(collapseId, 1); + } + + if (addHit) { result.hits().add(hit); } } } + private int getCollapseSize(Properties properties, String fieldName, int globalCollapseSize) { + Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName)); + + if (fieldCollapseSize != null) { + return fieldCollapseSize; + } + + return globalCollapseSize; + } } diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java index 361079f7595..d45d3866783 100644 --- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java @@ -68,6 +68,74 @@ public class FieldCollapsingSearcherTestCase { assertEquals(1, checker.queryCount); } + /** + * Tests that we do not fail on documents with missing collapsefield + * and that they are kept in the result. + */ + @Test + void testFieldCollapsingWithCollapseFieldMissing() { + Map<Searcher, Searcher> chained = new HashMap<>(); + + // Set up + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHitWithoutFields("http://acme.org/a.html", 10)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 1)); + r.hits().add(createHitWithoutFields("http://acme.org/c.html", 9)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/d.html", 7, 2)); + r.setTotalHitCount(5); + docsource.addResult(q, r); + + // Test basic collapsing on amid + q = new Query("?query=test_collapse&collapsefield=amid&collapsesize=1"); + r = doSearch(collapse, q, 0, 10, chained); + + assertEquals(4, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + + assertHitWithoutFields("http://acme.org/a.html", 10, r.hits().get(0)); + assertHitAmid("http://acme.org/b.html", 9, 1, r.hits().get(1)); + assertHitWithoutFields("http://acme.org/c.html", 9, r.hits().get(2)); + assertHitAmid("http://acme.org/d.html", 8, 2, r.hits().get(3)); + } + + @Test + void testFieldCollapsingOnMultipleFieldsWithCollapseFieldsMissing() { + Map<Searcher, Searcher> chained = new HashMap<>(); + + // Set up + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHitWithoutFields("http://acme.org/a.html", 10)); // - - + r.hits().add(createHitBmid("http://acme.org/b.html", 9, 1)); // - 1 + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); // 1 - + r.hits().add(createHitBmid("http://acme.org/d.html", 8, 1)); // - 1 + r.hits().add(createHit("http://acme.org/e.html", 8, 2, 2)); // 2 2 + r.setTotalHitCount(5); + docsource.addResult(q, r); + + // Test basic collapsing + q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize=1"); + r = doSearch(collapse, q, 0, 10, chained); + + assertEquals(4, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + + assertHitWithoutFields("http://acme.org/a.html", 10, r.hits().get(0)); + assertHitBmid("http://acme.org/b.html", 9, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(2)); + assertHit("http://acme.org/e.html", 8, 2, 2, r.hits().get(3)); + } + @Test void testFieldCollapsing() { Map<Searcher, Searcher> chained = new HashMap<>(); @@ -77,20 +145,16 @@ public class FieldCollapsingSearcherTestCase { DocumentSourceSearcher docsource = new DocumentSourceSearcher(); chained.put(collapse, docsource); - // Caveat: Collapse is set to false, because that's what the - // collapser asks for - Query q = new Query("?query=test_collapse&collapsefield=amid"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + Query q = new Query("?query=test_collapse"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 1)); - r.hits().add(createHit("http://acme.org/d.html", 8, 1)); - r.hits().add(createHit("http://acme.org/e.html", 8, 2)); - r.hits().add(createHit("http://acme.org/f.html", 7, 2)); - r.hits().add(createHit("http://acme.org/g.html", 7, 3)); - r.hits().add(createHit("http://acme.org/h.html", 6, 3)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3)); r.setTotalHitCount(8); docsource.addResult(q, r); @@ -100,46 +164,47 @@ public class FieldCollapsingSearcherTestCase { assertEquals(4, r.getHitCount()); assertEquals(1, docsource.getQueryCount()); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1)); - assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2)); - assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3)); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2)); + assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3)); } + /** + * Test that collapsing works if multiple searches are necessary. + */ @Test void testFieldCollapsingTwoPhase() { - // Set up Map<Searcher, Searcher> chained = new HashMap<>(); - FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + + // Set up + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(1, 1.0); DocumentSourceSearcher docsource = new DocumentSourceSearcher(); chained.put(collapse, docsource); - // Caveat: Collapse is set to false, because that's what the - // collapser asks for - Query q = new Query("?query=test_collapse&collapsefield=amid"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + + Query q = new Query("?query=test_collapse"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 1)); - r.hits().add(createHit("http://acme.org/d.html", 8, 1)); - r.hits().add(createHit("http://acme.org/e.html", 8, 2)); - r.hits().add(createHit("http://acme.org/f.html", 7, 2)); - r.hits().add(createHit("http://acme.org/g.html", 7, 3)); - r.hits().add(createHit("http://acme.org/h.html", 6, 3)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3)); r.setTotalHitCount(8); docsource.addResult(q, r); // Test basic collapsing on mid q = new Query("?query=test_collapse&collapsefield=amid"); - r = doSearch(collapse, q, 0, 10, chained); + r = doSearch(collapse, q, 0, 4, chained); assertEquals(4, r.getHitCount()); - assertEquals(1, docsource.getQueryCount()); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1)); - assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2)); - assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3)); + assertEquals(2, docsource.getQueryCount()); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2)); + assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3)); } @Test @@ -152,14 +217,14 @@ public class FieldCollapsingSearcherTestCase { Query q = new Query("?query=test_collapse"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 1)); - r.hits().add(createHit("http://acme.org/d.html", 8, 1)); - r.hits().add(createHit("http://acme.org/e.html", 8, 2)); - r.hits().add(createHit("http://acme.org/f.html", 7, 2)); - r.hits().add(createHit("http://acme.org/g.html", 7, 3)); - r.hits().add(createHit("http://acme.org/h.html", 6, 3)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3)); r.setTotalHitCount(8); docsource.addResult(q, r); @@ -183,20 +248,18 @@ public class FieldCollapsingSearcherTestCase { DocumentSourceSearcher docsource = new DocumentSourceSearcher(); chained.put(collapse, docsource); - Query q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + Query q = new Query("?query=test_collapse"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 0)); - r.hits().add(createHit("http://acme.org/d.html", 8, 0)); - r.hits().add(createHit("http://acme.org/e.html", 8, 0)); - r.hits().add(createHit("http://acme.org/f.html", 7, 0)); - r.hits().add(createHit("http://acme.org/g.html", 7, 0)); - r.hits().add(createHit("http://acme.org/h.html", 6, 0)); - r.hits().add(createHit("http://acme.org/i.html", 5, 1)); - r.hits().add(createHit("http://acme.org/j.html", 4, 2)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 0)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 0)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 0)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 0)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 0)); + r.hits().add(createHitAmid("http://acme.org/i.html", 5, 1)); + r.hits().add(createHitAmid("http://acme.org/j.html", 4, 2)); r.setTotalHitCount(10); docsource.addResult(q, r); @@ -206,15 +269,171 @@ public class FieldCollapsingSearcherTestCase { assertEquals(2, r.getHitCount()); assertEquals(2, docsource.getQueryCount()); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/i.html", 5, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/i.html", 5, 1, r.hits().get(1)); // Next results docsource.resetQueryCount(); r = doSearch(collapse, q, 2, 2, chained); assertEquals(1, r.getHitCount()); assertEquals(2, docsource.getQueryCount()); - assertHit("http://acme.org/j.html", 4, 2, r.hits().get(0)); + assertHitAmid("http://acme.org/j.html", 4, 2, r.hits().get(0)); + } + + /** + * Tests that collapsing hits with 2 fields works, + * this test also shows that field order is important + */ + @Test + void testCollapsingWithMultipleFields() { + // Set up + Map<Searcher, Searcher> chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 0)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 1)); + r.hits().add(createHit("http://acme.org/c.html", 8, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 7, 1, 0)); + r.setTotalHitCount(4); + docsource.addResult(q, r); + + // Test collapsing, starting with amid + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0)); + assertHit("http://acme.org/c.html", 8, 0, 1, r.hits().get(1)); + + docsource.resetQueryCount(); + + // Test collapsing, starting with bmid + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=bmid,amid"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(1, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0)); + } + + /** + * Tests that using different collapse sizes for different fields works + */ + @Test + void testCollapsingWithMultipleFieldsAndMultipleCollapseSizes() { + // Set up + Map<Searcher, Searcher> chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0)); + r.setTotalHitCount(4); + docsource.addResult(q, r); + + // Test collapsing + // default collapsesize is used for amid, bmid is set to 2 + q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize.bmid=2"); + r = doSearch(collapse, q, 0, 4, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(1)); + } + + /** + * Tests that using different collapse sizes for different fields works, + * test that the different ways to configure collapse size have the correct precedence + */ + @Test + void testCollapsingWithMultipleFieldsAndMultipleCollapseSizeSources() { + // Set up + Map<Searcher, Searcher> chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1)); + r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1)); + r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0)); + r.hits().add(createHit("http://acme.org/3.html", 8, 1, 0)); + r.setTotalHitCount(5); + docsource.addResult(q, r); + + // Test collapsing + // collapsesize 10 overwrites the default for amid & bmid + // collapsize.bmid overwrites the collapsesize for bmid again + q = new Query("?query=test_collapse&collapsesize=10&collapsefield=amid,bmid&collapsesize.bmid=2"); + r = doSearch(collapse, q, 0, 5, chained); + + assertEquals(4, r.getHitCount()); + assertEquals(1, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/b.html", 9, 1, 0, r.hits().get(1)); + assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(2)); + assertHit("http://acme.org/d.html", 8, 1, 0, r.hits().get(3)); + } + + /** + * Tests that collapsing on multiple fields works if we have to search multiple + * time to get enough hits + */ + @Test + void testCollapsingOnMoreFieldsWithManySimilarFieldValues() { + // Set up + Map<Searcher, Searcher> chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(4, 1.0); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 0, 1, 1)); // first hit + r.hits().add(createHit("http://acme.org/b.html", 9, 0, 1, 2)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 6, 2)); // - - 1. search: 1 + r.hits().add(createHit("http://acme.org/d.html", 8, 0, 6, 3)); + r.hits().add(createHit("http://acme.org/e.html", 8, 0, 6, 3)); + r.hits().add(createHit("http://acme.org/f.html", 7, 0, 6, 3)); // - - 1. search: 2 + r.hits().add(createHit("http://acme.org/g.html", 7, 0, 1, 1)); + r.hits().add(createHit("http://acme.org/h.html", 6, 1, 1, 1)); + r.hits().add(createHit("http://acme.org/i.html", 5, 2, 2, 1)); // - - 1. search: 3 + r.hits().add(createHit("http://acme.org/j.html", 4, 3, 3, 2)); // 3rd hit, cmid new + r.hits().add(createHit("http://acme.org/k.html", 4, 3, 4, 3)); + r.hits().add(createHit("http://acme.org/l.html", 4, 3, 5, 3)); // - - 1. search: 4 + r.hits().add(createHit("http://acme.org/m.html", 4, 4, 6, 3)); // 4th hit, amid new + r.hits().add(createHit("http://acme.org/n.html", 4, 4, 7, 4)); + r.setTotalHitCount(14); + docsource.addResult(q, r); + + // Test collapsing + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid,cmid"); + r = doSearch(collapse, q, 0, 2, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(4, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 0, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/j.html", 4, 3, 3, 2, r.hits().get(1)); + + // Next results + docsource.resetQueryCount(); + r = doSearch(collapse, q, 2, 2, chained); + assertEquals(1, r.getHitCount()); + assertEquals(3, docsource.getQueryCount()); + assertHit("http://acme.org/m.html", 4, 4, 6, 3, r.hits().get(0)); } /** @@ -228,20 +447,18 @@ public class FieldCollapsingSearcherTestCase { DocumentSourceSearcher docsource = new DocumentSourceSearcher(); chained.put(collapse, docsource); - Query q = new Query("?query=test_collapse&collapse=true&collapsefield=amid"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + Query q = new Query("?query=test_collapse"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 1)); - r.hits().add(createHit("http://acme.org/b.html", 10, 1)); - r.hits().add(createHit("http://acme.org/c.html", 10, 0)); - r.hits().add(createHit("http://acme.org/d.html", 10, 0)); - r.hits().add(createHit("http://acme.org/e.html", 10, 0)); - r.hits().add(createHit("http://acme.org/f.html", 10, 0)); - r.hits().add(createHit("http://acme.org/g.html", 10, 0)); - r.hits().add(createHit("http://acme.org/h.html", 10, 0)); - r.hits().add(createHit("http://acme.org/i.html", 10, 0)); - r.hits().add(createHit("http://acme.org/j.html", 10, 1)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 1)); + r.hits().add(createHitAmid("http://acme.org/b.html", 10, 1)); + r.hits().add(createHitAmid("http://acme.org/c.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/d.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/e.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/f.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/g.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/h.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/i.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/j.html", 10, 1)); r.setTotalHitCount(10); docsource.addResult(q, r); @@ -250,8 +467,8 @@ public class FieldCollapsingSearcherTestCase { r = doSearch(collapse, q, 0, 3, chained); assertEquals(2, r.getHitCount()); - assertHit("http://acme.org/a.html", 10, 1, r.hits().get(0)); - assertHit("http://acme.org/c.html", 10, 0, r.hits().get(1)); + assertHitAmid("http://acme.org/a.html", 10, 1, r.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 10, 0, r.hits().get(1)); } @Test @@ -265,20 +482,16 @@ public class FieldCollapsingSearcherTestCase { chained.put(collapse, messUp); chained.put(messUp, docsource); - // Caveat: Collapse is set to false, because that's what the collapser asks for Query q = new Query("?query=%22test%20collapse%22+b&collapsefield=amid&type=all"); - - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 0)); - r.hits().add(createHit("http://acme.org/d.html", 8, 0)); - r.hits().add(createHit("http://acme.org/e.html", 8, 0)); - r.hits().add(createHit("http://acme.org/f.html", 7, 0)); - r.hits().add(createHit("http://acme.org/g.html", 7, 0)); - r.hits().add(createHit("http://acme.org/h.html", 6, 1)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 0)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 0)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 0)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 0)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 1)); r.setTotalHitCount(8); docsource.addResult(q, r); @@ -288,8 +501,8 @@ public class FieldCollapsingSearcherTestCase { assertEquals(2, docsource.getQueryCount()); assertEquals(2, r.getHitCount()); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/h.html", 6, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/h.html", 6, 1, r.hits().get(1)); } @Test @@ -299,20 +512,17 @@ public class FieldCollapsingSearcherTestCase { FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(); DocumentSourceSearcher docsource = new DocumentSourceSearcher(); chained.put(collapse, docsource); - // Caveat: Collapse is set to false, because that's what the - // collapser asks for - Query q = new Query("?query=test_collapse&collapsefield=amid&summary=placeholder"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + + Query q = new Query("?query=test_collapse&summary=placeholder"); Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 1)); - r.hits().add(createHit("http://acme.org/d.html", 8, 1)); - r.hits().add(createHit("http://acme.org/e.html", 8, 2)); - r.hits().add(createHit("http://acme.org/f.html", 7, 2)); - r.hits().add(createHit("http://acme.org/g.html", 7, 3)); - r.hits().add(createHit("http://acme.org/h.html", 6, 3)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3)); r.setTotalHitCount(8); docsource.addResult(q, r); @@ -323,10 +533,10 @@ public class FieldCollapsingSearcherTestCase { assertEquals(4, r.getHitCount()); assertEquals(1, docsource.getQueryCount()); assertTrue(r.isFilled("placeholder")); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1)); - assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2)); - assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3)); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2)); + assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3)); docsource.resetQueryCount(); // Test basic collapsing on mid @@ -337,10 +547,10 @@ public class FieldCollapsingSearcherTestCase { assertEquals(1, docsource.getQueryCount()); assertFalse(r.isFilled("placeholder")); assertTrue(r.isFilled("short")); - assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0)); - assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1)); - assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2)); - assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3)); + assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1)); + assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2)); + assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3)); } @Test @@ -350,20 +560,17 @@ public class FieldCollapsingSearcherTestCase { DocumentSourceSearcher docsource = new DocumentSourceSearcher(); Chain<Searcher> chain = new Chain<>(collapse, new AddAggregationStyleGroupingResultSearcher(), docsource); - // Caveat: Collapse is set to false, because that's what the - // collapser asks for - Query q = new Query("?query=test_collapse&collapsefield=amid"); - // The searcher turns off collapsing further on in the chain - q.properties().set("collapse", "0"); + Query q = new Query("?query=test_collapse"); + Result r = new Result(q); - r.hits().add(createHit("http://acme.org/a.html", 10, 0)); - r.hits().add(createHit("http://acme.org/b.html", 9, 0)); - r.hits().add(createHit("http://acme.org/c.html", 9, 1)); - r.hits().add(createHit("http://acme.org/d.html", 8, 1)); - r.hits().add(createHit("http://acme.org/e.html", 8, 2)); - r.hits().add(createHit("http://acme.org/f.html", 7, 2)); - r.hits().add(createHit("http://acme.org/g.html", 7, 3)); - r.hits().add(createHit("http://acme.org/h.html", 6, 3)); + r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0)); + r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0)); + r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); + r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1)); + r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2)); + r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2)); + r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3)); + r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3)); r.setTotalHitCount(8); docsource.addResult(q, r); @@ -374,10 +581,10 @@ public class FieldCollapsingSearcherTestCase { // Assert that the regular hits are collapsed assertEquals(4 + 1, result.getHitCount()); assertEquals(1, docsource.getQueryCount()); - assertHit("http://acme.org/a.html", 10, 0, result.hits().get(0)); - assertHit("http://acme.org/c.html", 9, 1, result.hits().get(1)); - assertHit("http://acme.org/e.html", 8, 2, result.hits().get(2)); - assertHit("http://acme.org/g.html", 7, 3, result.hits().get(3)); + assertHitAmid("http://acme.org/a.html", 10, 0, result.hits().get(0)); + assertHitAmid("http://acme.org/c.html", 9, 1, result.hits().get(1)); + assertHitAmid("http://acme.org/e.html", 8, 2, result.hits().get(2)); + assertHitAmid("http://acme.org/g.html", 7, 3, result.hits().get(3)); // Assert that the aggregation group hierarchy is left intact HitGroup root = getFirstGroupIn(result.hits()); @@ -438,16 +645,64 @@ public class FieldCollapsingSearcherTestCase { } } - private FastHit createHit(String uri,int relevancy,int mid) { + private FastHit createHitWithoutFields(String uri, int relevancy) { + return new FastHit(uri,relevancy); + } + + private FastHit createHitAmid(String uri,int relevancy,int amid) { FastHit hit = new FastHit(uri,relevancy); - hit.setField("amid", String.valueOf(mid)); + hit.setField("amid", String.valueOf(amid)); return hit; } - private void assertHit(String uri,int relevancy,int mid,Hit hit) { + private FastHit createHitBmid(String uri,int relevancy,int bmid) { + FastHit hit = new FastHit(uri,relevancy); + hit.setField("bmid", String.valueOf(bmid)); + return hit; + } + + private FastHit createHit(String uri,int relevancy,int amid,int bmid) { + FastHit hit = new FastHit(uri,relevancy); + hit.setField("amid", String.valueOf(amid)); + hit.setField("bmid", String.valueOf(bmid)); + return hit; + } + + private FastHit createHit(String uri,int relevancy,int amid,int bmid,int cmid) { + FastHit hit = new FastHit(uri,relevancy); + hit.setField("amid", String.valueOf(amid)); + hit.setField("bmid", String.valueOf(bmid)); + hit.setField("cmid", String.valueOf(cmid)); + return hit; + } + + private void assertHitWithoutFields(String uri,int relevancy,Hit hit) { assertEquals(uri,hit.getId().toString()); assertEquals(relevancy, ((int) hit.getRelevance().getScore())); - assertEquals(mid,Integer.parseInt((String) hit.getField("amid"))); + assertTrue(hit.fields().isEmpty()); + } + + private void assertHitAmid(String uri, int relevancy, int amid, Hit hit) { + assertEquals(uri,hit.getId().toString()); + assertEquals(relevancy, ((int) hit.getRelevance().getScore())); + assertEquals(amid,Integer.parseInt((String) hit.getField("amid"))); + } + + private void assertHitBmid(String uri, int relevancy, int bmid, Hit hit) { + assertEquals(uri,hit.getId().toString()); + assertEquals(relevancy, ((int) hit.getRelevance().getScore())); + assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid"))); + } + + private void assertHit(String uri,int relevancy,int amid,int bmid,Hit hit) { + assertHitAmid(uri,relevancy,amid,hit); + assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid"))); + } + + private void assertHit(String uri,int relevancy,int amid,int bmid,int cmid,Hit hit) { + assertHitAmid(uri,relevancy,amid,hit); + assertHitBmid(uri,relevancy,bmid,hit); + assertEquals(cmid,Integer.parseInt((String) hit.getField("cmid"))); } private static class ZeroHitsControl extends com.yahoo.search.Searcher { |