summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java75
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java64
2 files changed, 102 insertions, 37 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
index 31542cb194d..efeedb9bf55 100644
--- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
@@ -6,10 +6,10 @@ import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.fastsearch.FastHit;
+import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
-import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.query.Properties;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
@@ -114,24 +114,10 @@ public class FieldCollapsingSearcher extends Searcher {
resultSource = search(query.clone(), execution, nextOffset, hitsToRequest);
fill(resultSource, summaryClass, execution);
- // collapse by the primary field, using the query-result as the source
- // this either fills an empty result, or extends the existing one from a previous iteration
- collapse(result, knownCollapses, resultSource, collapseFields[0],
- getCollapseSize(query.properties(), collapseFields[0], globalCollapseSize)
+ collapse(result, knownCollapses, resultSource,
+ collapseFields, query.properties(), globalCollapseSize
);
- // collapse even further, using the other fields
- // using the result as source, we just (possibly) reduce the number of hits
- for (int i = 1; i < collapseFields.length; i++) {
- Result newResult = new Result(query);
-
- collapse(newResult, knownCollapses, result, collapseFields[i],
- getCollapseSize(query.properties(), collapseFields[i], globalCollapseSize)
- );
-
- result = newResult;
- }
-
hitsAfterCollapse = result.getHitCount();
if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) {
// the searcher downstream has no more hits
@@ -165,36 +151,51 @@ public class FieldCollapsingSearcher extends Searcher {
/**
* Collapse logic. Preserves only maxHitsPerField hits
- * for each unique value of the collapsing parameter.
+ * for each unique value of the collapsing parameters.
+ * Uses collapsefields sequentially.
*/
- private void collapse(Result result, Map<String, Integer> knownCollapses,
- Result resultSource, String collapseField, int collapseSize) {
+ private void collapse(Result result, Map<String, Integer> knownCollapses, Result resultSource,
+ String[] collapseFields, Properties queryProperties, int globalCollapseSize) {
+
for (Hit unknownHit : resultSource.hits()) {
if (!(unknownHit instanceof FastHit hit)) {
result.hits().add(unknownHit);
continue;
}
- Object peek = hit.getField(collapseField);
- String collapseId = peek != null ? peek.toString() : null;
- if (collapseId == null) {
- result.hits().add(hit);
- continue;
- }
- // prepending the fieldname is necessary to distinguish between values in the different collapsefields
- // @ cannot occur in fieldnames
- String collapseKey = collapseField + "@" + collapseId;
+ boolean addHit = true;
- if (knownCollapses.containsKey(collapseKey)) {
- int numHitsThisField = knownCollapses.get(collapseKey);
+ for (String collapseField : collapseFields) {
- if (numHitsThisField < collapseSize) {
- result.hits().add(hit);
- ++numHitsThisField;
- knownCollapses.put(collapseKey, numHitsThisField);
+ Object peek = hit.getField(collapseField);
+ String collapseId = peek != null ? peek.toString() : null;
+ if (collapseId == null) {
+ continue;
}
- } else {
- knownCollapses.put(collapseKey, 1);
+
+ // prepending the fieldname is necessary to distinguish between values in the different collapsefields
+ // @ cannot occur in fieldnames
+ String collapseKey = collapseField + "@" + collapseId;
+
+ if (knownCollapses.containsKey(collapseKey)) {
+ int numHitsThisField = knownCollapses.get(collapseKey);
+ int collapseSize = getCollapseSize(queryProperties, collapseField, globalCollapseSize);
+
+ if (numHitsThisField < collapseSize) {
+ ++numHitsThisField;
+ knownCollapses.put(collapseKey, numHitsThisField);
+ } else {
+ addHit = false;
+ // immediate return, so that following collapseFields do not record the fieldvalues of this hit
+ // needed for sequential collapsing, otherwise later collapsefields would remove too many hits
+ break;
+ }
+ } else {
+ knownCollapses.put(collapseKey, 1);
+ }
+ }
+
+ if (addHit) {
result.hits().add(hit);
}
}
diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
index 4c34a2fdb4c..52a344f647e 100644
--- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
@@ -407,6 +407,56 @@ public class FieldCollapsingSearcherTestCase {
}
/**
+ * Tests that collapsing on multiple fields works if we have to search multiple
+ * time to get enough hits
+ */
+ @Test
+ void testCollapsingOnMoreFieldsWithManySimilarFieldValues() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(4, 1.0);
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ // The searcher turns off collapsing further on in the chain
+ q.properties().set("collapse", "0");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 0, 1, 1)); // first hit
+ r.hits().add(createHit("http://acme.org/b.html", 9, 0, 1, 2));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 6, 2)); // - - 1. search: 1
+ r.hits().add(createHit("http://acme.org/d.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/e.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/f.html", 7, 0, 6, 3)); // - - 1. search: 2
+ r.hits().add(createHit("http://acme.org/g.html", 7, 0, 1, 1));
+ r.hits().add(createHit("http://acme.org/h.html", 6, 1, 1, 1));
+ r.hits().add(createHit("http://acme.org/i.html", 5, 2, 2, 1)); // - - 1. search: 3
+ r.hits().add(createHit("http://acme.org/j.html", 4, 3, 3, 2)); // 3rd hit, cmid new
+ r.hits().add(createHit("http://acme.org/k.html", 4, 3, 4, 3));
+ r.hits().add(createHit("http://acme.org/l.html", 4, 3, 5, 3)); // - - 1. search: 4
+ r.hits().add(createHit("http://acme.org/m.html", 4, 4, 6, 3)); // 4th hit, amid new
+ r.hits().add(createHit("http://acme.org/n.html", 4, 4, 7, 4));
+ r.setTotalHitCount(14);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid,cmid");
+ r = doSearch(collapse, q, 0, 2, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(4, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 0, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/j.html", 4, 3, 3, 2, r.hits().get(1));
+
+ // Next results
+ docsource.resetQueryCount();
+ r = doSearch(collapse, q, 2, 2, chained);
+ assertEquals(1, r.getHitCount());
+ assertEquals(3, docsource.getQueryCount());
+ assertHit("http://acme.org/m.html", 4, 4, 6, 3, r.hits().get(0));
+ }
+
+ /**
* Tests collapsing of "messy" data
*/
@Test
@@ -650,6 +700,14 @@ public class FieldCollapsingSearcherTestCase {
return hit;
}
+ private FastHit createHit(String uri,int relevancy,int amid,int bmid,int cmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("amid", String.valueOf(amid));
+ hit.setField("bmid", String.valueOf(bmid));
+ hit.setField("cmid", String.valueOf(cmid));
+ return hit;
+ }
+
private void assertHitWithoutFields(String uri,int relevancy,Hit hit) {
assertEquals(uri,hit.getId().toString());
assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
@@ -673,6 +731,12 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid")));
}
+ private void assertHit(String uri,int relevancy,int amid,int bmid,int cmid,Hit hit) {
+ assertHitAmid(uri,relevancy,amid,hit);
+ assertHitBmid(uri,relevancy,bmid,hit);
+ assertEquals(cmid,Integer.parseInt((String) hit.getField("cmid")));
+ }
+
private static class ZeroHitsControl extends com.yahoo.search.Searcher {
public int queryCount = 0;