diff options
2 files changed, 102 insertions, 37 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java index 31542cb194d..efeedb9bf55 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -6,10 +6,10 @@ import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.chain.dependencies.Before; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; -import com.yahoo.processing.request.CompoundName; import com.yahoo.search.query.Properties; import com.yahoo.search.result.Hit; import com.yahoo.search.searchchain.Execution; @@ -114,24 +114,10 @@ public class FieldCollapsingSearcher extends Searcher { resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); fill(resultSource, summaryClass, execution); - // collapse by the primary field, using the query-result as the source - // this either fills an empty result, or extends the existing one from a previous iteration - collapse(result, knownCollapses, resultSource, collapseFields[0], - getCollapseSize(query.properties(), collapseFields[0], globalCollapseSize) + collapse(result, knownCollapses, resultSource, + collapseFields, query.properties(), globalCollapseSize ); - // collapse even further, using the other fields - // using the result as source, we just (possibly) reduce the number of hits - for (int i = 1; i < collapseFields.length; i++) { - Result newResult = new Result(query); - - collapse(newResult, knownCollapses, result, collapseFields[i], - getCollapseSize(query.properties(), collapseFields[i], globalCollapseSize) - ); - - result = newResult; - } - hitsAfterCollapse = result.getHitCount(); if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { // the searcher downstream has no more hits @@ -165,36 +151,51 @@ public class FieldCollapsingSearcher extends Searcher { /** * Collapse logic. Preserves only maxHitsPerField hits - * for each unique value of the collapsing parameter. + * for each unique value of the collapsing parameters. + * Uses collapsefields sequentially. */ - private void collapse(Result result, Map<String, Integer> knownCollapses, - Result resultSource, String collapseField, int collapseSize) { + private void collapse(Result result, Map<String, Integer> knownCollapses, Result resultSource, + String[] collapseFields, Properties queryProperties, int globalCollapseSize) { + for (Hit unknownHit : resultSource.hits()) { if (!(unknownHit instanceof FastHit hit)) { result.hits().add(unknownHit); continue; } - Object peek = hit.getField(collapseField); - String collapseId = peek != null ? peek.toString() : null; - if (collapseId == null) { - result.hits().add(hit); - continue; - } - // prepending the fieldname is necessary to distinguish between values in the different collapsefields - // @ cannot occur in fieldnames - String collapseKey = collapseField + "@" + collapseId; + boolean addHit = true; - if (knownCollapses.containsKey(collapseKey)) { - int numHitsThisField = knownCollapses.get(collapseKey); + for (String collapseField : collapseFields) { - if (numHitsThisField < collapseSize) { - result.hits().add(hit); - ++numHitsThisField; - knownCollapses.put(collapseKey, numHitsThisField); + Object peek = hit.getField(collapseField); + String collapseId = peek != null ? peek.toString() : null; + if (collapseId == null) { + continue; } - } else { - knownCollapses.put(collapseKey, 1); + + // prepending the fieldname is necessary to distinguish between values in the different collapsefields + // @ cannot occur in fieldnames + String collapseKey = collapseField + "@" + collapseId; + + if (knownCollapses.containsKey(collapseKey)) { + int numHitsThisField = knownCollapses.get(collapseKey); + int collapseSize = getCollapseSize(queryProperties, collapseField, globalCollapseSize); + + if (numHitsThisField < collapseSize) { + ++numHitsThisField; + knownCollapses.put(collapseKey, numHitsThisField); + } else { + addHit = false; + // immediate return, so that following collapseFields do not record the fieldvalues of this hit + // needed for sequential collapsing, otherwise later collapsefields would remove too many hits + break; + } + } else { + knownCollapses.put(collapseKey, 1); + } + } + + if (addHit) { result.hits().add(hit); } } diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java index 4c34a2fdb4c..52a344f647e 100644 --- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/FieldCollapsingSearcherTestCase.java @@ -407,6 +407,56 @@ public class FieldCollapsingSearcherTestCase { } /** + * Tests that collapsing on multiple fields works if we have to search multiple + * time to get enough hits + */ + @Test + void testCollapsingOnMoreFieldsWithManySimilarFieldValues() { + // Set up + Map<Searcher, Searcher> chained = new HashMap<>(); + FieldCollapsingSearcher collapse = new FieldCollapsingSearcher(4, 1.0); + DocumentSourceSearcher docsource = new DocumentSourceSearcher(); + chained.put(collapse, docsource); + + Query q = new Query("?query=test_collapse"); + // The searcher turns off collapsing further on in the chain + q.properties().set("collapse", "0"); + Result r = new Result(q); + r.hits().add(createHit("http://acme.org/a.html", 10, 0, 1, 1)); // first hit + r.hits().add(createHit("http://acme.org/b.html", 9, 0, 1, 2)); + r.hits().add(createHit("http://acme.org/c.html", 9, 0, 6, 2)); // - - 1. search: 1 + r.hits().add(createHit("http://acme.org/d.html", 8, 0, 6, 3)); + r.hits().add(createHit("http://acme.org/e.html", 8, 0, 6, 3)); + r.hits().add(createHit("http://acme.org/f.html", 7, 0, 6, 3)); // - - 1. search: 2 + r.hits().add(createHit("http://acme.org/g.html", 7, 0, 1, 1)); + r.hits().add(createHit("http://acme.org/h.html", 6, 1, 1, 1)); + r.hits().add(createHit("http://acme.org/i.html", 5, 2, 2, 1)); // - - 1. search: 3 + r.hits().add(createHit("http://acme.org/j.html", 4, 3, 3, 2)); // 3rd hit, cmid new + r.hits().add(createHit("http://acme.org/k.html", 4, 3, 4, 3)); + r.hits().add(createHit("http://acme.org/l.html", 4, 3, 5, 3)); // - - 1. search: 4 + r.hits().add(createHit("http://acme.org/m.html", 4, 4, 6, 3)); // 4th hit, amid new + r.hits().add(createHit("http://acme.org/n.html", 4, 4, 7, 4)); + r.setTotalHitCount(14); + docsource.addResult(q, r); + + // Test collapsing + q = new Query("?query=test_collapse&collapsesize=1&collapsefield=amid,bmid,cmid"); + r = doSearch(collapse, q, 0, 2, chained); + + assertEquals(2, r.getHitCount()); + assertEquals(4, docsource.getQueryCount()); + assertHit("http://acme.org/a.html", 10, 0, 1, 1, r.hits().get(0)); + assertHit("http://acme.org/j.html", 4, 3, 3, 2, r.hits().get(1)); + + // Next results + docsource.resetQueryCount(); + r = doSearch(collapse, q, 2, 2, chained); + assertEquals(1, r.getHitCount()); + assertEquals(3, docsource.getQueryCount()); + assertHit("http://acme.org/m.html", 4, 4, 6, 3, r.hits().get(0)); + } + + /** * Tests collapsing of "messy" data */ @Test @@ -650,6 +700,14 @@ public class FieldCollapsingSearcherTestCase { return hit; } + private FastHit createHit(String uri,int relevancy,int amid,int bmid,int cmid) { + FastHit hit = new FastHit(uri,relevancy); + hit.setField("amid", String.valueOf(amid)); + hit.setField("bmid", String.valueOf(bmid)); + hit.setField("cmid", String.valueOf(cmid)); + return hit; + } + private void assertHitWithoutFields(String uri,int relevancy,Hit hit) { assertEquals(uri,hit.getId().toString()); assertEquals(relevancy, ((int) hit.getRelevance().getScore())); @@ -673,6 +731,12 @@ public class FieldCollapsingSearcherTestCase { assertEquals(bmid,Integer.parseInt((String) hit.getField("bmid"))); } + private void assertHit(String uri,int relevancy,int amid,int bmid,int cmid,Hit hit) { + assertHitAmid(uri,relevancy,amid,hit); + assertHitBmid(uri,relevancy,bmid,hit); + assertEquals(cmid,Integer.parseInt((String) hit.getField("cmid"))); + } + private static class ZeroHitsControl extends com.yahoo.search.Searcher { public int queryCount = 0; |