summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authortomglk <>2024-01-08 08:44:10 +0100
committertomglk <>2024-01-08 08:44:10 +0100
commit154b1da4d31969fe05c53275fb3ac1c451b7fa9a (patch)
tree94bb7e08d42646c897db4a0bf2e8d73ae08c41e5 /container-search
parentd33f2119651360228f52baa6c174d8f3e1e8bcb4 (diff)
Keep hits from previous searches
Add hits to result instead of overwriting the result. Nest loop over collapsefields inside loop over hits to consider all fieldvalues before adding a new hit. And test for multi-field collapsing with multiple searches necessary.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java75
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java64
2 files changed, 102 insertions, 37 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
index 31542cb194d..efeedb9bf55 100644
--- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
@@ -6,10 +6,10 @@ import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.fastsearch.FastHit;
+import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
-import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.query.Properties;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
@@ -114,24 +114,10 @@ public class FieldCollapsingSearcher extends Searcher {
resultSource = search(query.clone(), execution, nextOffset, hitsToRequest);
fill(resultSource, summaryClass, execution);
- // collapse by the primary field, using the query-result as the source
- // this either fills an empty result, or extends the existing one from a previous iteration
- collapse(result, knownCollapses, resultSource, collapseFields[0],
- getCollapseSize(query.properties(), collapseFields[0], globalCollapseSize)
+ collapse(result, knownCollapses, resultSource,
+ collapseFields, query.properties(), globalCollapseSize
);
- // collapse even further, using the other fields
- // using the result as source, we just (possibly) reduce the number of hits
- for (int i = 1; i < collapseFields.length; i++) {
- Result newResult = new Result(query);
-
- collapse(newResult, knownCollapses, result, collapseFields[i],
- getCollapseSize(query.properties(), collapseFields[i], globalCollapseSize)
- );
-
- result = newResult;
- }
-
hitsAfterCollapse = result.getHitCount();
if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) {
// the searcher downstream has no more hits
@@ -165,36 +151,51 @@ public class FieldCollapsingSearcher extends Searcher {
/**
* Collapse logic. Preserves only maxHitsPerField hits
- * for each unique value of the collapsing parameter.
+ * for each unique value of the collapsing parameters.
+ * Uses collapsefields sequentially.
*/
- private void collapse(Result result, Map<String, Integer> knownCollapses,
- Result resultSource, String collapseField, int collapseSize) {
+ private void collapse(Result result, Map<String, Integer> knownCollapses, Result resultSource,
+ String[] collapseFields, Properties queryProperties, int globalCollapseSize) {
+
for (Hit unknownHit : resultSource.hits()) {
if (!(unknownHit instanceof FastHit hit)) {
result.hits().add(unknownHit);
continue;
}
- Object peek = hit.getField(collapseField);
- String collapseId = peek != null ? peek.toString() : null;
- if (collapseId == null) {
- result.hits().add(hit);
- continue;
- }
- // prepending the fieldname is necessary to distinguish between values in the different collapsefields
- // @ cannot occur in fieldnames
- String collapseKey = collapseField + "@" + collapseId;
+ boolean addHit = true;
- if (knownCollapses.containsKey(collapseKey)) {
- int numHitsThisField = knownCollapses.get(collapseKey);
+ for (String collapseField : collapseFields) {
- if (numHitsThisField < collapseSize) {
- result.hits().add(hit);
- ++numHitsThisField;
- knownCollapses.put(collapseKey, numHitsThisField);
+ Object peek = hit.getField(collapseField);
+ String collapseId = peek != null ? peek.toString() : null;
+ if (collapseId == null) {
+ continue;
}
- } else {
- knownCollapses.put(collapseKey, 1);
+
+ // prepending the fieldname is necessary to distinguish between values in the different collapsefields
+ // @ cannot occur in fieldnames
+ String collapseKey = collapseField + "@" + collapseId;
+
+ if (knownCollapses.containsKey(collapseKey)) {
+ int numHitsThisField = knownCollapses.get(collapseKey);
+ int collapseSize = getCollapseSize(queryProperties, collapseField, globalCollapseSize);
+
+ if (numHitsThisField < collapseSize) {
+ ++numHitsThisField;
+ knownCollapses.put(collapseKey, numHitsThisField);
+ } else {
+ addHit = false;
+ // immediate return, so that following collapseFields do not record the fieldvalues of this hit
+ // needed for sequential collapsing, otherwise later collapsefields would remove too many hits
+ break;
+ }
+ } else {
+ knownCollapses.put(collapseKey, 1);
+ }
+ }
+
+ if (addHit) {
result.hits().add(hit);
}
}
diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
index 4c34a2fdb4c..52a344f647e 100644
--- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
@@ -407,6 +407,56 @@ public class FieldCollapsingSearcherTestCase {
}
/**
+ * Tests that collapsing on multiple fields works if we have to search multiple
+ * time to get enough hits
+ */
+ @Test
+ void testCollapsingOnMoreFieldsWithManySimilarFieldValues() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(4, 1.0);
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ // The searcher turns off collapsing further on in the chain
+ q.properties().set("collapse", "0");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 0, 1, 1)); // first hit
+ r.hits().add(createHit("http://acme.org/b.html", 9, 0, 1, 2));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 6, 2)); // - - 1. search: 1
+ r.hits().add(createHit("http://acme.org/d.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/e.html", 8, 0, 6, 3));
+ r.hits().add(createHit("http://acme.org/f.html", 7, 0, 6, 3)); // - - 1. search: 2
+ r.hits().add(createHit("http://acme.org/g.html", 7, 0, 1, 1));
+ r.hits().add(createHit("http://acme.org/h.html", 6, 1, 1, 1));
+ r.hits().add(createHit("http://acme.org/i.html", 5, 2, 2, 1)); // - - 1. search: 3
+ r.hits().add(createHit("http://acme.org/j.html", 4, 3, 3, 2)); // 3rd hit, cmid new
+ r.hits().add(createHit("http://acme.org/k.html", 4, 3, 4, 3));
+ r.hits().add(createHit("http://acme.org/l.html", 4, 3, 5, 3)); // - - 1. search: 4
+ r.hits().add(createHit("http://acme.org/m.html", 4, 4, 6, 3)); // 4th hit, amid new
+ r.hits().add(createHit("http://acme.org/n.html", 4, 4, 7, 4));
+ r.setTotalHitCount(14);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid,cmid");
+ r = doSearch(collapse, q, 0, 2, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(4, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 0, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/j.html", 4, 3, 3, 2, r.hits().get(1));
+
+ // Next results
+ docsource.resetQueryCount();
+ r = doSearch(collapse, q, 2, 2, chained);
+ assertEquals(1, r.getHitCount());
+ assertEquals(3, docsource.getQueryCount());
+ assertHit("http://acme.org/m.html", 4, 4, 6, 3, r.hits().get(0));
+ }
+
+ /**
* Tests collapsing of "messy" data
*/
@Test
@@ -650,6 +700,14 @@ public class FieldCollapsingSearcherTestCase {
return hit;
}
+ private FastHit createHit(String uri,int relevancy,int amid,int bmid,int cmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("amid", String.valueOf(amid));
+ hit.setField("bmid", String.valueOf(bmid));
+ hit.setField("cmid", String.valueOf(cmid));
+ return hit;
+ }
+
private void assertHitWithoutFields(String uri,int relevancy,Hit hit) {
assertEquals(uri,hit.getId().toString());
assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
@@ -673,6 +731,12 @@ public class FieldCollapsingSearcherTestCase {
assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid")));
}
+ private void assertHit(String uri,int relevancy,int amid,int bmid,int cmid,Hit hit) {
+ assertHitAmid(uri,relevancy,amid,hit);
+ assertHitBmid(uri,relevancy,bmid,hit);
+ assertEquals(cmid,Integer.parseInt((String) hit.getField("cmid")));
+ }
+
private static class ZeroHitsControl extends com.yahoo.search.Searcher {
public int queryCount = 0;