// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.searcher;
import com.yahoo.component.annotation.Inject;
import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.fastsearch.FastHit;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.result.Hit;
import com.yahoo.search.searchchain.Execution;
import com.yahoo.search.searchchain.PhaseNames;
import java.util.Iterator;
import java.util.Map;
/**
* A searcher which does parametrized collapsing.
*
* @author Steinar Knutsen
*/
@After(PhaseNames.RAW_QUERY)
@Before(PhaseNames.TRANSFORMED_QUERY)
public class FieldCollapsingSearcher extends Searcher {
private static final CompoundName collapse = new CompoundName("collapse");
private static final CompoundName collapsefield = new CompoundName("collapsefield");
private static final CompoundName collapsesize = new CompoundName("collapsesize");
private static final CompoundName collapseSummaryName = new CompoundName("collapse.summary");
/** Maximum number of queries to send next searcher */
private final int maxQueries = 4;
/**
* The max number of hits that will be preserved per unique
* value of the collapsing parameter.
*/
private int defaultCollapseSize;
/**
* The factor by which to scale up the requested number of hits
* from the next searcher in the chain, because collapsing will
* likely delete many hits.
*/
private double extraFactor;
/** Create this searcher using default values for all settings */
public FieldCollapsingSearcher() {
this((String) null);
}
/**
* Creates a collapser
*
* @param collapseField the default field to collapse on, or null to not collapse as default
*/
public FieldCollapsingSearcher(String collapseField) {
this(1, 2.0, collapseField);
}
@Inject
@SuppressWarnings("unused")
public FieldCollapsingSearcher(QrSearchersConfig config) {
QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher
s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher();
init(s.collapsesize(), s.extrafactor());
}
/**
* Creates a collapser
*
* @param collapseSize the maximum number of hits to keep per
* field the default max number of hits in each collapsed group
* @param extraFactor the percentage by which to scale up the
* requested number of hits, to allow some hits to be removed
* without refetching
* @param collapseField the field to collapse on. This is currently ignored.
*/
public FieldCollapsingSearcher(int collapseSize, double extraFactor, String collapseField) {
init(collapseSize, extraFactor);
}
private void init(int collapseSize, double extraFactor) {
this.defaultCollapseSize = collapseSize;
this.extraFactor = extraFactor;
}
/**
* First fetch result from the next searcher in the chain.
* If collapse is active, do collapsing.
* Otherwise, act as a simple pass through
*/
@Override
public Result search(com.yahoo.search.Query query, Execution execution) {
String collapseField = query.properties().getString(collapsefield);
if (collapseField == null) return execution.search(query);
int collapseSize = query.properties().getInteger(collapsesize,defaultCollapseSize);
query.properties().set(collapse, "0");
int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0;
int nextOffset = 0;
int hitsAfterCollapse;
boolean moreHitsAvailable = true;
Map knownCollapses = new java.util.HashMap<>();
Result result = new Result(query);
int performedQueries = 0;
Result resultSource;
String collapseSummary = query.properties().getString(collapseSummaryName);
String summaryClass = (collapseSummary == null)
? query.getPresentation().getSummary() : collapseSummary;
query.trace("Collapsing by '" + collapseField + "' using summary '" + collapseSummary + "'", 2);
do {
resultSource = search(query.clone(), execution, nextOffset, hitsToRequest);
fill(resultSource, summaryClass, execution);
collapse(result, knownCollapses, resultSource, collapseField, collapseSize);
hitsAfterCollapse = result.getHitCount();
if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) {
// the searcher downstream has no more hits
moreHitsAvailable = false;
}
nextOffset += hitsToRequest;
if (hitsAfterCollapse < query.getOffset() + query.getHits()) {
hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor);
}
++performedQueries;
} while (hitsToRequest != 0
&& (hitsAfterCollapse < query.getOffset() + query.getHits())
&& moreHitsAvailable
&& (performedQueries <= maxQueries));
// Set correct meta information
result.mergeWith(resultSource);
// Keep only (offset,.. offset+hits) hits
result.hits().trim(query.getOffset(), query.getHits());
// Mark query as query with collapsing
query.properties().set(collapse, "1");
return result;
}
private Result search(Query query, Execution execution, int offset, int hits) {
query.setOffset(offset);
query.setHits(hits);
return execution.search(query);
}
/**
* Collapse logic. Preserves only maxHitsPerField hits
* for each unique value of the collapsing parameter.
*/
private void collapse(Result result, Map knownCollapses,
Result resultSource, String collapseField, int collapseSize) {
for (Iterator it = resultSource.hits().iterator(); it.hasNext();) {
Hit unknownHit = it.next();
if (!(unknownHit instanceof FastHit)) {
result.hits().add(unknownHit);
continue;
}
FastHit hit = (FastHit) unknownHit;
Object peek = hit.getField(collapseField);
String collapseId = peek != null ? peek.toString() : null;
if (collapseId == null) {
result.hits().add(hit);
continue;
}
if (knownCollapses.containsKey(collapseId)) {
int numHitsThisField = knownCollapses.get(collapseId);
if (numHitsThisField < collapseSize) {
result.hits().add(hit);
++numHitsThisField;
knownCollapses.put(collapseId, numHitsThisField);
}
} else {
knownCollapses.put(collapseId, 1);
result.hits().add(hit);
}
}
}
}