summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authortomglk <>2023-12-21 17:32:29 +0100
committertomglk <>2023-12-21 17:32:29 +0100
commit5614a846b5c4d6980a9880577ffa9253a2112ce0 (patch)
treef0f2f2de758ee29fbf54772fcf2163260f33c830 /container-search
parent96db3d9c40402bbd7dd89f4ecd668c2b69b1721f (diff)
Add support for collapsing on multiple fields
Changes in FieldCollapsingSearcher: - Allow passing a comma-separated list of fields in collapsefield - Use collapsesize to override the default for all fields - Use collapsesize.fieldname to override the size for a specific field
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java58
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java125
2 files changed, 173 insertions, 10 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
index 708c6de1212..5d848bb2ad5 100644
--- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
+++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java
@@ -10,14 +10,16 @@ import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.processing.request.CompoundName;
+import com.yahoo.search.query.Properties;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.search.searchchain.PhaseNames;
+import java.util.Arrays;
import java.util.Map;
/**
- * A searcher which does parametrized collapsing.
+ * A searcher which does parameterized collapsing.
*
* @author Steinar Knutsen
*/
@@ -85,11 +87,14 @@ public class FieldCollapsingSearcher extends Searcher {
*/
@Override
public Result search(com.yahoo.search.Query query, Execution execution) {
- String collapseField = query.properties().getString(collapsefield);
+ String collapseFieldParam = query.properties().getString(collapsefield);
- if (collapseField == null) return execution.search(query);
+ if (collapseFieldParam == null) return execution.search(query);
+
+ String[] collapseFields = collapseFieldParam.split(",");
+
+ int globalCollapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize);
- int collapseSize = query.properties().getInteger(collapsesize, defaultCollapseSize);
query.properties().set(collapse, "0");
int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0;
@@ -103,12 +108,29 @@ public class FieldCollapsingSearcher extends Searcher {
String collapseSummary = query.properties().getString(collapseSummaryName);
String summaryClass = (collapseSummary == null)
? query.getPresentation().getSummary() : collapseSummary;
- query.trace("Collapsing by '" + collapseField + "' using summary '" + collapseSummary + "'", 2);
+ query.trace("Collapsing by '" + Arrays.toString(collapseFields) + "' using summary '" + collapseSummary + "'", 2);
do {
resultSource = search(query.clone(), execution, nextOffset, hitsToRequest);
fill(resultSource, summaryClass, execution);
- collapse(result, knownCollapses, resultSource, collapseField, collapseSize);
+
+ // collapse by the primary field, using the query-result as the source
+ // this either fills an empty result, or extends the existing one from a previous iteration
+ collapse(result, knownCollapses, resultSource, collapseFields[0],
+ getCollapseSize(query.properties(), collapseFields[0], globalCollapseSize)
+ );
+
+ // collapse even further, using the other fields
+ // using the result as source, we just (possibly) reduce the number of hits
+ for (int i = 1; i < collapseFields.length; i++) {
+ Result newResult = new Result(query);
+
+ collapse(newResult, knownCollapses, result, collapseFields[i],
+ getCollapseSize(query.properties(), collapseFields[i], globalCollapseSize)
+ );
+
+ result = newResult;
+ }
hitsAfterCollapse = result.getHitCount();
if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) {
@@ -159,19 +181,35 @@ public class FieldCollapsingSearcher extends Searcher {
continue;
}
- if (knownCollapses.containsKey(collapseId)) {
- int numHitsThisField = knownCollapses.get(collapseId);
+ // prepending the fieldname is necessary to distinguish between values in the different collapsefields
+ String collapseKey = collapseField + "_" + collapseId;
+
+ if (knownCollapses.containsKey(collapseKey)) {
+ int numHitsThisField = knownCollapses.get(collapseKey);
if (numHitsThisField < collapseSize) {
result.hits().add(hit);
++numHitsThisField;
- knownCollapses.put(collapseId, numHitsThisField);
+ knownCollapses.put(collapseKey, numHitsThisField);
}
} else {
- knownCollapses.put(collapseId, 1);
+ knownCollapses.put(collapseKey, 1);
result.hits().add(hit);
}
}
}
+ private Integer getCollapseSize(Properties properties, String fieldName, Integer globalCollapseSize) {
+ Integer fieldCollapseSize = properties.getInteger(collapsesize.append(fieldName));
+
+ if (fieldCollapseSize != null) {
+ return fieldCollapseSize;
+ }
+
+ if (globalCollapseSize != null) {
+ return globalCollapseSize;
+ }
+
+ return defaultCollapseSize;
+ }
}
diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
index 361079f7595..70251e2cb8e 100644
--- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java
@@ -218,6 +218,119 @@ public class FieldCollapsingSearcherTestCase {
}
/**
+ * Tests that collapsing hits with 2 fields works,
+ * this test also shows that field order is important
+ */
+ @Test
+ void testCollapsingWithMultipleFields() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ // The searcher turns off collapsing further on in the chain
+ q.properties().set("collapse", "0");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 0));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 1));
+ r.hits().add(createHit("http://acme.org/c.html", 8, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 7, 1, 0));
+ r.setTotalHitCount(4);
+ docsource.addResult(q, r);
+
+ // Test collapsing, starting with amid
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0));
+ assertHit("http://acme.org/c.html", 8, 0, 1, r.hits().get(1));
+
+
+ // Test collapsing, starting with bmid
+ q = new Query("?query=test_collapse&collapsesize=1&collapsefield=bmid,amid");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(1, r.getHitCount());
+ assertEquals(2, docsource.getQueryCount()); // 2 because + 1 from above
+ assertHit("http://acme.org/a.html", 10, 1, 0, r.hits().get(0));
+ }
+
+ /**
+ * Tests that using different collapse sizes for different fields works
+ */
+ @Test
+ void testCollapsingWithMultipleFieldsAndMultipleCollapseSizes() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ // The searcher turns off collapsing further on in the chain
+ q.properties().set("collapse", "0");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0));
+ r.setTotalHitCount(4);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ // default collapsesize is used for amid, bmid is set to 2
+ q = new Query("?query=test_collapse&collapsefield=amid,bmid&collapsesize.bmid=2");
+ r = doSearch(collapse, q, 0, 4, chained);
+
+ assertEquals(2, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(1));
+ }
+
+ /**
+ * Tests that using different collapse sizes for different fields works,
+ * test that the different ways to configure collapse size have the correct precedence
+ */
+ @Test
+ void testCollapsingWithMultipleFieldsAndMultipleCollapseSizeSources() {
+ // Set up
+ Map<Searcher, Searcher> chained = new HashMap<>();
+ FieldCollapsingSearcher collapse = new FieldCollapsingSearcher();
+ DocumentSourceSearcher docsource = new DocumentSourceSearcher();
+ chained.put(collapse, docsource);
+
+ Query q = new Query("?query=test_collapse");
+ // The searcher turns off collapsing further on in the chain
+ q.properties().set("collapse", "0");
+ Result r = new Result(q);
+ r.hits().add(createHit("http://acme.org/a.html", 10, 1, 1));
+ r.hits().add(createHit("http://acme.org/b.html", 9, 1, 0));
+ r.hits().add(createHit("http://acme.org/c.html", 9, 0, 1));
+ r.hits().add(createHit("http://acme.org/d.html", 8, 1, 0));
+ r.hits().add(createHit("http://acme.org/3.html", 8, 1, 0));
+ r.setTotalHitCount(5);
+ docsource.addResult(q, r);
+
+ // Test collapsing
+ // collapsesize 10 overwrites the default for amid & bmid
+ // collapsize.bmid overwrites the collapsesize for bmid again
+ q = new Query("?query=test_collapse&collapsesize=10&collapsefield=amid,bmid&collapsesize.bmid=2");
+ r = doSearch(collapse, q, 0, 5, chained);
+
+ assertEquals(4, r.getHitCount());
+ assertEquals(1, docsource.getQueryCount());
+ assertHit("http://acme.org/a.html", 10, 1, 1, r.hits().get(0));
+ assertHit("http://acme.org/b.html", 9, 1, 0, r.hits().get(1));
+ assertHit("http://acme.org/c.html", 9, 0, 1, r.hits().get(2));
+ assertHit("http://acme.org/d.html", 8, 1, 0, r.hits().get(3));
+ }
+
+ /**
* Tests collapsing of "messy" data
*/
@Test
@@ -444,12 +557,24 @@ public class FieldCollapsingSearcherTestCase {
return hit;
}
+ private FastHit createHit(String uri,int relevancy,int amid,int bmid) {
+ FastHit hit = new FastHit(uri,relevancy);
+ hit.setField("amid", String.valueOf(amid));
+ hit.setField("bmid", String.valueOf(bmid));
+ return hit;
+ }
+
private void assertHit(String uri,int relevancy,int mid,Hit hit) {
assertEquals(uri,hit.getId().toString());
assertEquals(relevancy, ((int) hit.getRelevance().getScore()));
assertEquals(mid,Integer.parseInt((String) hit.getField("amid")));
}
+ private void assertHit(String uri,int relevancy,int amid,int bmid,Hit hit) {
+ assertHit(uri,relevancy,amid,hit);
+ assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid")));
+ }
+
private static class ZeroHitsControl extends com.yahoo.search.Searcher {
public int queryCount = 0;