aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2024-01-09 09:36:47 +0100
committerGitHub <noreply@github.com>2024-01-09 09:36:47 +0100
commit0e405a76c44ba1b61e443f455291e2c9163ab0cc (patch)
treee52da938352e770e12d84f69eee4720253971012
parentfebffe8e9cf98f552c5c46e75e88f0aaa6072a2f (diff)
parenta3ed28deef78118a39512c3a25b86b5191e4a7b9 (diff)
Merge pull request #29733 from tomglk/multifield-collapsingv8.284.4
Add support for collapsing on multiple fields
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java90
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java521
2 files changed, 453 insertions, 158 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
index 708c6de1212..0559bd808bc 100644
--- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
@@ -6,18 +6,20 @@ import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.fastsearch.FastHit;
+import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
-import com.yahoo.processing.request.CompoundName;
+import com.yahoo.search.query.Properties;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.search.searchchain.PhaseNames;
+import java.util.Arrays;
import java.util.Map;
/**
- * A searcher which does parametrized collapsing.
+ * A searcher which does parameterized collapsing.
*
* @author Steinar Knutsen
*/
@@ -30,12 +32,16 @@ public class FieldCollapsingSearcher extends Searcher {
private static final CompoundName collapsesize = CompoundName.from("collapsesize");
private static final CompoundName collapseSummaryName = CompoundName.from("collapse.summary");
+ /** Separator used for the fieldnames in collapsefield */
+ private static final String separator = ",";
+
/** Maximum number of queries to send next searcher */
private static final int maxQueries = 4;
/**
* The max number of hits that will be preserved per unique
- * value of the collapsing parameter.
+ * value of the collapsing parameter,
+ * if no field-specific value is configured.
*/
private int defaultCollapseSize;
@@ -85,11 +91,14 @@ public class FieldCollapsingSearcher extends Searcher {
*/
@Override
public Result search(com.yahoo.search.Query query, Execution execution) {
- String collapseField = query.properties().getString(collapsefield);
+ String collapseFieldParam = query.properties().getString(collapsefield);
+
+ if (collapseFieldParam == null) return execution.search(query);
+
+ String[] collapseFields = collapseFieldParam.split(separator);
- if (collapseField == null) return execution.search(query);
+ int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize);
- int collapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize);
query.properties().set(collapse, "0");
int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0;
@@ -103,12 +112,15 @@ public class FieldCollapsingSearcher extends Searcher {
String collapseSummary = query.properties().getString(collapseSummaryName);
String summaryClass = (collapseSummary == null)
? query.getPresentation().getSummary() : collapseSummary;
- query.trace("Collapsing by '" + collapseField + "' using summary '" + collapseSummary + "'", 2);
+ query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2);
do {
resultSource = search(query.clone(), execution, nextOffset, hitsToRequest);
fill(resultSource, summaryClass, execution);
- collapse(result, knownCollapses, resultSource, collapseField, collapseSize);
+
+ collapse(result, knownCollapses, resultSource,
+ collapseFields, query.properties(), globalCollapseSize
+ );
hitsAfterCollapse = result.getHitCount();
if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) {
@@ -143,35 +155,63 @@ public class FieldCollapsingSearcher extends Searcher {
/**
* Collapse logic. Preserves only maxHitsPerField hits
- * for each unique value of the collapsing parameter.
+ * for each unique value of the collapsing parameters.
+ * Uses collapsefields sequentially.
*/
- private void collapse(Result result, Map<String, Integer> knownCollapses,
- Result resultSource, String collapseField, int collapseSize) {
+ private void collapse(Result result, Map<String, Integer> knownCollapses, Result resultSource,
+ String[] collapseFields, Properties queryProperties, int globalCollapseSize) {
+
for (Hit unknownHit : resultSource.hits()) {
if (!(unknownHit instanceof FastHit hit)) {
result.hits().add(unknownHit);
continue;
}
- Object peek = hit.getField(collapseField);
- String collapseId = peek != null ? peek.toString() : null;
- if (collapseId == null) {
- result.hits().add(hit);
- continue;
- }
- if (knownCollapses.containsKey(collapseId)) {
- int numHitsThisField = knownCollapses.get(collapseId);
+ boolean addHit = true;
- if (numHitsThisField < collapseSize) {
- result.hits().add(hit);
- ++numHitsThisField;
- knownCollapses.put(collapseId, numHitsThisField);
+ for (String collapseField : collapseFields) {
+
+ Object peek = hit.getField(collapseField);
+ String collapseId = peek != null ? peek.toString() : null;
+ if (collapseId == null) {
+ continue;
+ }
+
+ // prepending the fieldname is necessary to distinguish between values in the different collapsefields
+ // @ cannot occur in fieldnames
+ String collapseKey = collapseField + "@" + collapseId;
+
+ if (knownCollapses.containsKey(collapseKey)) {
+ int numHitsThisField = knownCollapses.get(collapseKey);
+ int collapseSize = getCollapseSize(queryProperties, collapseField, globalCollapseSize);
+
+ if (numHitsThisField < collapseSize) {
+ ++numHitsThisField;
+ knownCollapses.put(collapseKey, numHitsThisField);
+ } else {
+ addHit = false;
+ // immediate return, so that following collapseFields do not record the fieldvalues of this hit
+ // needed for sequential collapsing, otherwise later collapsefields would remove too many hits
+ break;
+ }
+ } else {
+ knownCollapses.put(collapseKey, 1);
}
- } else {
- knownCollapses.put(collapseId, 1);
+ }
+
+ if (addHit) {
result.hits().add(hit);
}
}
}
+ private int getCollapseSize(Properties properties, String fieldName, int globalCollapseSize) {
+ Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName));
+
+ if (fieldCollapseSize != null) {
+ return fieldCollapseSize;
+ }
+
+ return globalCollapseSize;
+ }
}
diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
index 361079f7595..d45d3866783 100644
--- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
@@ -68,6 +68,74 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(1, checker.queryCount);
}
+ /**
+ * Tests that we do not fail on documents with missing collapsefield
+ * and that they are kept in the result.
+ */
+ @Test
+ void testFieldCollapsingWithCollapseFieldMissing() {
+ Map<Searcher, Searcher> chained = new HashMap<>();
+
+ // Set up
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHitWithoutFields("http://acme.org/a.html", 10));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 1));
+ r.hits().add(createHitWithoutFields("http://acme.org/c.html", 9));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 7, 2));
+ r.setTotalHitCount(5);
+ docsource.addResult(q, r);
+
+ // Test basic collapsing on amid
+ q = new Query("?query=test_collapse&collapsefield=amid&collapsesize=1");
+ r = doSearch(collapse, q, 0, 10, chained);
+
+ assertEquals(4, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+
+ assertHitWithoutFields("http://acme.org/a.html", 10, r.hits().get(0));
+ assertHitAmid("http://acme.org/b.html", 9, 1, r.hits().get(1));
+ assertHitWithoutFields("http://acme.org/c.html", 9, r.hits().get(2));
+ assertHitAmid("http://acme.org/d.html", 8, 2, r.hits().get(3));
+ }
+
+ @Test
+ void testFieldCollapsingOnMultipleFieldsWithCollapseFieldsMissing() {
+ Map<Searcher, Searcher> chained = new HashMap<>();
+
+ // Set up
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHitWithoutFields("http://acme.org/a.html", 10)); // - -
+ r.hits().add(createHitBmid("http://acme.org/b.html", 9, 1)); // - 1
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1)); // 1 -
+ r.hits().add(createHitBmid("http://acme.org/d.html", 8, 1)); // - 1
+ r.hits().add(createHit("http://acme.org/e.html", 8, 2, 2)); // 2 2
+ r.setTotalHitCount(5);
+ docsource.addResult(q, r);
+
+ // Test basic collapsing
+ q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize=1");
+ r = doSearch(collapse, q, 0, 10, chained);
+
+ assertEquals(4, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+
+ assertHitWithoutFields("http://acme.org/a.html", 10, r.hits().get(0));
+ assertHitBmid("http://acme.org/b.html", 9, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(2));
+ assertHit("http://acme.org/e.html", 8, 2, 2, r.hits().get(3));
+ }
+
@Test
void testFieldCollapsing() {
Map<Searcher, Searcher> chained = new HashMap<>();
@@ -77,20 +145,16 @@ public class FieldCollapsingSearcherTestCase {
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
chained.put(collapse, docsource);
- // Caveat: Collapse is set to false, because that's what the
- // collapser asks for
- Query q = new Query("?query=test_collapse&collapsefield=amid");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+ Query q = new Query("?query=test_collapse");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 1));
- r.hits().add(createHit("http://acme.org/d.html", 8, 1));
- r.hits().add(createHit("http://acme.org/e.html", 8, 2));
- r.hits().add(createHit("http://acme.org/f.html", 7, 2));
- r.hits().add(createHit("http://acme.org/g.html", 7, 3));
- r.hits().add(createHit("http://acme.org/h.html", 6, 3));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3));
r.setTotalHitCount(8);
docsource.addResult(q, r);
@@ -100,46 +164,47 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(4, r.getHitCount());
assertEquals(1, docsource.getQueryCount());
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1));
- assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2));
- assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3));
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2));
+ assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3));
}
+ /**
+ * Test that collapsing works if multiple searches are necessary.
+ */
@Test
void testFieldCollapsingTwoPhase() {
- // Set up
Map<Searcher, Searcher> chained = new HashMap<>();
- FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+
+ // Set up
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(1, 1.0);
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
chained.put(collapse, docsource);
- // Caveat: Collapse is set to false, because that's what the
- // collapser asks for
- Query q = new Query("?query=test_collapse&collapsefield=amid");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+
+ Query q = new Query("?query=test_collapse");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 1));
- r.hits().add(createHit("http://acme.org/d.html", 8, 1));
- r.hits().add(createHit("http://acme.org/e.html", 8, 2));
- r.hits().add(createHit("http://acme.org/f.html", 7, 2));
- r.hits().add(createHit("http://acme.org/g.html", 7, 3));
- r.hits().add(createHit("http://acme.org/h.html", 6, 3));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3));
r.setTotalHitCount(8);
docsource.addResult(q, r);
// Test basic collapsing on mid
q = new Query("?query=test_collapse&collapsefield=amid");
- r = doSearch(collapse, q, 0, 10, chained);
+ r = doSearch(collapse, q, 0, 4, chained);
assertEquals(4, r.getHitCount());
- assertEquals(1, docsource.getQueryCount());
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1));
- assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2));
- assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3));
+ assertEquals(2, docsource.getQueryCount());
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2));
+ assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3));
}
@Test
@@ -152,14 +217,14 @@ public class FieldCollapsingSearcherTestCase {
Query q = new Query("?query=test_collapse");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 1));
- r.hits().add(createHit("http://acme.org/d.html", 8, 1));
- r.hits().add(createHit("http://acme.org/e.html", 8, 2));
- r.hits().add(createHit("http://acme.org/f.html", 7, 2));
- r.hits().add(createHit("http://acme.org/g.html", 7, 3));
- r.hits().add(createHit("http://acme.org/h.html", 6, 3));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3));
r.setTotalHitCount(8);
docsource.addResult(q, r);
@@ -183,20 +248,18 @@ public class FieldCollapsingSearcherTestCase {
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
chained.put(collapse, docsource);
- Query q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+ Query q = new Query("?query=test_collapse");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 0));
- r.hits().add(createHit("http://acme.org/d.html", 8, 0));
- r.hits().add(createHit("http://acme.org/e.html", 8, 0));
- r.hits().add(createHit("http://acme.org/f.html", 7, 0));
- r.hits().add(createHit("http://acme.org/g.html", 7, 0));
- r.hits().add(createHit("http://acme.org/h.html", 6, 0));
- r.hits().add(createHit("http://acme.org/i.html", 5, 1));
- r.hits().add(createHit("http://acme.org/j.html", 4, 2));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 0));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 0));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 0));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 0));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 0));
+ r.hits().add(createHitAmid("http://acme.org/i.html", 5, 1));
+ r.hits().add(createHitAmid("http://acme.org/j.html", 4, 2));
r.setTotalHitCount(10);
docsource.addResult(q, r);
@@ -206,15 +269,171 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(2, r.getHitCount());
assertEquals(2, docsource.getQueryCount());
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/i.html", 5, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/i.html", 5, 1, r.hits().get(1));
// Next results
docsource.resetQueryCount();
r = doSearch(collapse, q, 2, 2, chained);
assertEquals(1, r.getHitCount());
assertEquals(2, docsource.getQueryCount());
- assertHit("http://acme.org/j.html", 4, 2, r.hits().get(0));
+ assertHitAmid("http://acme.org/j.html", 4, 2, r.hits().get(0));
+ }
+
+ /**
+ * Tests that collapsing hits with 2 fields works,
+ * this test also shows that field order is important
+ */
+ @Test
+ void testCollapsingWithMultipleFields() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 0));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 1));
+ r.hits().add(createHit("http://acme.org/c.html", 8, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 7, 1, 0));
+ r.setTotalHitCount(4);
+ docsource.addResult(q, r);
+
+ // Test collapsing, starting with amid
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0));
+ assertHit("http://acme.org/c.html", 8, 0, 1, r.hits().get(1));
+
+ docsource.resetQueryCount();
+
+ // Test collapsing, starting with bmid
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=bmid,amid");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(1, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0));
+ }
+
+ /**
+ * Tests that using different collapse sizes for different fields works
+ */
+ @Test
+ void testCollapsingWithMultipleFieldsAndMultipleCollapseSizes() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0));
+ r.setTotalHitCount(4);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ // default collapsesize is used for amid, bmid is set to 2
+ q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize.bmid=2");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(1));
+ }
+
+ /**
+ * Tests that using different collapse sizes for different fields works,
+ * test that the different ways to configure collapse size have the correct precedence
+ */
+ @Test
+ void testCollapsingWithMultipleFieldsAndMultipleCollapseSizeSources() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0));
+ r.hits().add(createHit("http://acme.org/3.html", 8, 1, 0));
+ r.setTotalHitCount(5);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ // collapsesize 10 overwrites the default for amid & bmid
+ // collapsize.bmid overwrites the collapsesize for bmid again
+ q = new Query("?query=test_collapse&collapsesize=10&collapsefield=amid,bmid&collapsesize.bmid=2");
+ r = doSearch(collapse, q, 0, 5, chained);
+
+ assertEquals(4, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/b.html", 9, 1, 0, r.hits().get(1));
+ assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(2));
+ assertHit("http://acme.org/d.html", 8, 1, 0, r.hits().get(3));
+ }
+
+ /**
+ * Tests that collapsing on multiple fields works if we have to search multiple
+ * time to get enough hits
+ */
+ @Test
+ void testCollapsingOnMoreFieldsWithManySimilarFieldValues() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(4, 1.0);
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 0, 1, 1)); // first hit
+ r.hits().add(createHit("http://acme.org/b.html", 9, 0, 1, 2));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 6, 2)); // - - 1. search: 1
+ r.hits().add(createHit("http://acme.org/d.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/e.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/f.html", 7, 0, 6, 3)); // - - 1. search: 2
+ r.hits().add(createHit("http://acme.org/g.html", 7, 0, 1, 1));
+ r.hits().add(createHit("http://acme.org/h.html", 6, 1, 1, 1));
+ r.hits().add(createHit("http://acme.org/i.html", 5, 2, 2, 1)); // - - 1. search: 3
+ r.hits().add(createHit("http://acme.org/j.html", 4, 3, 3, 2)); // 3rd hit, cmid new
+ r.hits().add(createHit("http://acme.org/k.html", 4, 3, 4, 3));
+ r.hits().add(createHit("http://acme.org/l.html", 4, 3, 5, 3)); // - - 1. search: 4
+ r.hits().add(createHit("http://acme.org/m.html", 4, 4, 6, 3)); // 4th hit, amid new
+ r.hits().add(createHit("http://acme.org/n.html", 4, 4, 7, 4));
+ r.setTotalHitCount(14);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid,cmid");
+ r = doSearch(collapse, q, 0, 2, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(4, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 0, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/j.html", 4, 3, 3, 2, r.hits().get(1));
+
+ // Next results
+ docsource.resetQueryCount();
+ r = doSearch(collapse, q, 2, 2, chained);
+ assertEquals(1, r.getHitCount());
+ assertEquals(3, docsource.getQueryCount());
+ assertHit("http://acme.org/m.html", 4, 4, 6, 3, r.hits().get(0));
}
/**
@@ -228,20 +447,18 @@ public class FieldCollapsingSearcherTestCase {
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
chained.put(collapse, docsource);
- Query q = new Query("?query=test_collapse&collapse=true&collapsefield=amid");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+ Query q = new Query("?query=test_collapse");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 1));
- r.hits().add(createHit("http://acme.org/b.html", 10, 1));
- r.hits().add(createHit("http://acme.org/c.html", 10, 0));
- r.hits().add(createHit("http://acme.org/d.html", 10, 0));
- r.hits().add(createHit("http://acme.org/e.html", 10, 0));
- r.hits().add(createHit("http://acme.org/f.html", 10, 0));
- r.hits().add(createHit("http://acme.org/g.html", 10, 0));
- r.hits().add(createHit("http://acme.org/h.html", 10, 0));
- r.hits().add(createHit("http://acme.org/i.html", 10, 0));
- r.hits().add(createHit("http://acme.org/j.html", 10, 1));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 1));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 10, 1));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/i.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/j.html", 10, 1));
r.setTotalHitCount(10);
docsource.addResult(q, r);
@@ -250,8 +467,8 @@ public class FieldCollapsingSearcherTestCase {
r = doSearch(collapse, q, 0, 3, chained);
assertEquals(2, r.getHitCount());
- assertHit("http://acme.org/a.html", 10, 1, r.hits().get(0));
- assertHit("http://acme.org/c.html", 10, 0, r.hits().get(1));
+ assertHitAmid("http://acme.org/a.html", 10, 1, r.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 10, 0, r.hits().get(1));
}
@Test
@@ -265,20 +482,16 @@ public class FieldCollapsingSearcherTestCase {
chained.put(collapse, messUp);
chained.put(messUp, docsource);
- // Caveat: Collapse is set to false, because that's what the collapser asks for
Query q = new Query("?query=%22test%20collapse%22+b&collapsefield=amid&type=all");
-
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 0));
- r.hits().add(createHit("http://acme.org/d.html", 8, 0));
- r.hits().add(createHit("http://acme.org/e.html", 8, 0));
- r.hits().add(createHit("http://acme.org/f.html", 7, 0));
- r.hits().add(createHit("http://acme.org/g.html", 7, 0));
- r.hits().add(createHit("http://acme.org/h.html", 6, 1));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 0));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 0));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 0));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 0));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 1));
r.setTotalHitCount(8);
docsource.addResult(q, r);
@@ -288,8 +501,8 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(2, docsource.getQueryCount());
assertEquals(2, r.getHitCount());
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/h.html", 6, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/h.html", 6, 1, r.hits().get(1));
}
@Test
@@ -299,20 +512,17 @@ public class FieldCollapsingSearcherTestCase {
FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
chained.put(collapse, docsource);
- // Caveat: Collapse is set to false, because that's what the
- // collapser asks for
- Query q = new Query("?query=test_collapse&collapsefield=amid&summary=placeholder");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+
+ Query q = new Query("?query=test_collapse&summary=placeholder");
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 1));
- r.hits().add(createHit("http://acme.org/d.html", 8, 1));
- r.hits().add(createHit("http://acme.org/e.html", 8, 2));
- r.hits().add(createHit("http://acme.org/f.html", 7, 2));
- r.hits().add(createHit("http://acme.org/g.html", 7, 3));
- r.hits().add(createHit("http://acme.org/h.html", 6, 3));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3));
r.setTotalHitCount(8);
docsource.addResult(q, r);
@@ -323,10 +533,10 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(4, r.getHitCount());
assertEquals(1, docsource.getQueryCount());
assertTrue(r.isFilled("placeholder"));
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1));
- assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2));
- assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3));
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2));
+ assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3));
docsource.resetQueryCount();
// Test basic collapsing on mid
@@ -337,10 +547,10 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(1, docsource.getQueryCount());
assertFalse(r.isFilled("placeholder"));
assertTrue(r.isFilled("short"));
- assertHit("http://acme.org/a.html", 10, 0, r.hits().get(0));
- assertHit("http://acme.org/c.html", 9, 1, r.hits().get(1));
- assertHit("http://acme.org/e.html", 8, 2, r.hits().get(2));
- assertHit("http://acme.org/g.html", 7, 3, r.hits().get(3));
+ assertHitAmid("http://acme.org/a.html", 10, 0, r.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 9, 1, r.hits().get(1));
+ assertHitAmid("http://acme.org/e.html", 8, 2, r.hits().get(2));
+ assertHitAmid("http://acme.org/g.html", 7, 3, r.hits().get(3));
}
@Test
@@ -350,20 +560,17 @@ public class FieldCollapsingSearcherTestCase {
DocumentSourceSearcher docsource = new DocumentSourceSearcher();
Chain<Searcher> chain = new Chain<>(collapse, new AddAggregationStyleGroupingResultSearcher(), docsource);
- // Caveat: Collapse is set to false, because that's what the
- // collapser asks for
- Query q = new Query("?query=test_collapse&collapsefield=amid");
- // The searcher turns off collapsing further on in the chain
- q.properties().set("collapse", "0");
+ Query q = new Query("?query=test_collapse");
+
Result r = new Result(q);
- r.hits().add(createHit("http://acme.org/a.html", 10, 0));
- r.hits().add(createHit("http://acme.org/b.html", 9, 0));
- r.hits().add(createHit("http://acme.org/c.html", 9, 1));
- r.hits().add(createHit("http://acme.org/d.html", 8, 1));
- r.hits().add(createHit("http://acme.org/e.html", 8, 2));
- r.hits().add(createHit("http://acme.org/f.html", 7, 2));
- r.hits().add(createHit("http://acme.org/g.html", 7, 3));
- r.hits().add(createHit("http://acme.org/h.html", 6, 3));
+ r.hits().add(createHitAmid("http://acme.org/a.html", 10, 0));
+ r.hits().add(createHitAmid("http://acme.org/b.html", 9, 0));
+ r.hits().add(createHitAmid("http://acme.org/c.html", 9, 1));
+ r.hits().add(createHitAmid("http://acme.org/d.html", 8, 1));
+ r.hits().add(createHitAmid("http://acme.org/e.html", 8, 2));
+ r.hits().add(createHitAmid("http://acme.org/f.html", 7, 2));
+ r.hits().add(createHitAmid("http://acme.org/g.html", 7, 3));
+ r.hits().add(createHitAmid("http://acme.org/h.html", 6, 3));
r.setTotalHitCount(8);
docsource.addResult(q, r);
@@ -374,10 +581,10 @@ public class FieldCollapsingSearcherTestCase {
// Assert that the regular hits are collapsed
assertEquals(4 + 1, result.getHitCount());
assertEquals(1, docsource.getQueryCount());
- assertHit("http://acme.org/a.html", 10, 0, result.hits().get(0));
- assertHit("http://acme.org/c.html", 9, 1, result.hits().get(1));
- assertHit("http://acme.org/e.html", 8, 2, result.hits().get(2));
- assertHit("http://acme.org/g.html", 7, 3, result.hits().get(3));
+ assertHitAmid("http://acme.org/a.html", 10, 0, result.hits().get(0));
+ assertHitAmid("http://acme.org/c.html", 9, 1, result.hits().get(1));
+ assertHitAmid("http://acme.org/e.html", 8, 2, result.hits().get(2));
+ assertHitAmid("http://acme.org/g.html", 7, 3, result.hits().get(3));
// Assert that the aggregation group hierarchy is left intact
HitGroup root = getFirstGroupIn(result.hits());
@@ -438,16 +645,64 @@ public class FieldCollapsingSearcherTestCase {
}
}
- private FastHit createHit(String uri,int relevancy,int mid) {
+ private FastHit createHitWithoutFields(String uri, int relevancy) {
+ return new FastHit(uri,relevancy);
+ }
+
+ private FastHit createHitAmid(String uri,int relevancy,int amid) {
FastHit hit = new FastHit(uri,relevancy);
- hit.setField("amid", String.valueOf(mid));
+ hit.setField("amid", String.valueOf(amid));
return hit;
}
- private void assertHit(String uri,int relevancy,int mid,Hit hit) {
+ private FastHit createHitBmid(String uri,int relevancy,int bmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("bmid", String.valueOf(bmid));
+ return hit;
+ }
+
+ private FastHit createHit(String uri,int relevancy,int amid,int bmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("amid", String.valueOf(amid));
+ hit.setField("bmid", String.valueOf(bmid));
+ return hit;
+ }
+
+ private FastHit createHit(String uri,int relevancy,int amid,int bmid,int cmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("amid", String.valueOf(amid));
+ hit.setField("bmid", String.valueOf(bmid));
+ hit.setField("cmid", String.valueOf(cmid));
+ return hit;
+ }
+
+ private void assertHitWithoutFields(String uri,int relevancy,Hit hit) {
assertEquals(uri,hit.getId().toString());
assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
- assertEquals(mid,Integer.parseInt((String) hit.getField("amid")));
+ assertTrue(hit.fields().isEmpty());
+ }
+
+ private void assertHitAmid(String uri, int relevancy, int amid, Hit hit) {
+ assertEquals(uri,hit.getId().toString());
+ assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
+ assertEquals(amid,Integer.parseInt((String) hit.getField("amid")));
+ }
+
+ private void assertHitBmid(String uri, int relevancy, int bmid, Hit hit) {
+ assertEquals(uri,hit.getId().toString());
+ assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
+ assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid")));
+ }
+
+ private void assertHit(String uri,int relevancy,int amid,int bmid,Hit hit) {
+ assertHitAmid(uri,relevancy,amid,hit);
+ assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid")));
+ }
+
+ private void assertHit(String uri,int relevancy,int amid,int bmid,int cmid,Hit hit) {
+ assertHitAmid(uri,relevancy,amid,hit);
+ assertHitBmid(uri,relevancy,bmid,hit);
+ assertEquals(cmid,Integer.parseInt((String) hit.getField("cmid")));
}
private static class ZeroHitsControl extends com.yahoo.search.Searcher {