diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /container-search/src/main/java/com/yahoo/prelude/searcher |
Publish
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/searcher')
16 files changed, 2335 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java new file mode 100644 index 00000000000..268fe5f4ea5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java @@ -0,0 +1,276 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + + +/** + * Flattens a result consisting of multiple hit groups containing hits + * into a single flat list of hits. + * + * @author Bob Travis + * @author Steinar Knutsen + * @author Arne Fossaa + */ +@After(PhaseNames.BLENDED_RESULT) +@Before(PhaseNames.UNBLENDED_RESULT) +@Provides(BlendingSearcher.BLENDING) +public class BlendingSearcher extends Searcher { + + public static final String BLENDING = "Blending"; + + private final String documentId; + + @Inject + public BlendingSearcher(ComponentId id, QrSearchersConfig cfg) { + super(id); + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.BlendingSearcher s = cfg.com().yahoo().prelude().searcher().BlendingSearcher(); + documentId = s.docid().length() > 0 ? s.docid() : null; + + } + + /** + * Only for legacy tests. + */ + public BlendingSearcher(String blendingDocumentId) { + this.documentId = blendingDocumentId; + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + Result result = execution.search(query); + + Result blended = blendResults(result, query, query.getOffset(), query.getHits(), execution); + blended.trace("Blended result"); + return blended; + } + + /** + * Fills this result by forwarding to the right chained searchers + */ + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + result.analyzeHits(); + } + + /** + * Produce a single blended result list from a group of hitgroups. + * + * It is assumed that the results are ordered in hitgroups. If not, the blend will not be performed + */ + protected Result blendResults(Result result, Query q, int offset, int hits, Execution execution) { + + //Assert that there are more than one hitgroup and that there are only hitgroups on the lowest level + + boolean foundNonGroup = false; + Iterator<Hit> hitIterator = result.hits().iterator(); + List<HitGroup> groups = new ArrayList<>(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit instanceof HitGroup) { + groups.add((HitGroup)hit); + hitIterator.remove(); + } else if(!hit.isMeta()) { + foundNonGroup = true; + } + } + + if(foundNonGroup) { + result.hits().addError(ErrorMessage.createUnspecifiedError("Blendingsearcher could not blend - there are toplevel hits" + + " that are not hitgroups")); + return result; + } + if (groups.size() == 0) { + return result; + } else if (groups.size() == 1) { + result.hits().addAll(groups.get(0).asUnorderedHits()); + result.hits().setOrderer(groups.get(0).getOrderer()); + return result; + } else { + if (documentId != null) { + return blendResultsUniquely(result, q, offset, hits, groups, execution); + } else { + return blendResultsDirectly(result, q, offset, hits, groups, execution); + } + } + } + + private Result sortAndTrimResults(Result result, Query q, int offset, int hits, Execution execution) { + if (q.getRanking().getSorting() != null) { + execution.fillAttributes(result); // Always correct as we can only sort on attributes + result.hits().sort(); + } + result.hits().trim(offset, hits); + return result; + } + + private abstract class DocumentMerger { + protected Set<String> documentsToStrip; + protected Result result; + protected HitGroup group; + + abstract void put(HitGroup source, Hit hit, Execution execution); + + abstract void scan(Hit hit, int i, Execution execution); + + Result getResult() { + return result; + } + + //Since we cannot use prelude.hit#getProperty, we'll have to improvise + private String getProperty(Hit hit, String field) { + Object o = hit.getField(field); + return o == null ? null : o.toString(); + } + + + protected void storeID(Hit hit, Execution execution) { + String id = getProperty(hit, documentId); + + if (id != null) { + documentsToStrip.add(id); + } else { + if (!result.isFilled(result.getQuery().getPresentation().getSummary())) { + fill(result, result.getQuery().getPresentation().getSummary(), execution); + id = getProperty(hit, documentId); + if (id != null) { + documentsToStrip.add(id); + } + } + } + } + + protected boolean known(HitGroup source, Hit hit, Execution execution) { + String stripID = getProperty(hit, documentId); + + if (stripID == null) { + if (!source.isFilled(result.getQuery().getPresentation().getSummary())) { + Result nResult = new Result(result.getQuery()); + nResult.hits().add(source); + fill(nResult, nResult.getQuery().getPresentation().getSummary(), execution); + stripID = getProperty(hit, documentId); + if (stripID == null) { + return false; + } + } else { + return false; + } + } + + if (documentsToStrip.contains(stripID)) { + return true; + } + + documentsToStrip.add(stripID); + return false; + } + + void scanResult(Execution execution) { + List<Hit> hits = group.asUnorderedHits(); + for (int i = hits.size()-1; i >= 0; i--) { + Hit sniffHit = hits.get(i); + if (!sniffHit.isMeta()) { + scan(sniffHit, i, execution); + } else { + result.hits().add(sniffHit); + } + } + } + + void mergeResults(List<HitGroup> groups, Execution execution) { + // note, different loop direction from scanResult() + for(HitGroup group : groups.subList(1, groups.size())) { + for(Hit hit : group.asList()) { + if(hit.isMeta()) { + result.hits().add(hit); + } else { + put(group, hit, execution); + } + } + } + } + } + + + private class BasicMerger extends DocumentMerger { + BasicMerger(Result result, HitGroup group) { + this.result = result; + this.group = group; + } + + void put(HitGroup source, Hit hit, Execution execution) { + result.hits().add(hit); + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + } + } + + + private class UniqueMerger extends DocumentMerger { + UniqueMerger(Result result, HitGroup group, Set<String> documentsToStrip) { + this.documentsToStrip = documentsToStrip; + this.result = result; + this.group = group; + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + if (!hit.isMeta()) { + storeID(hit, execution); + } + } + + void put(HitGroup source, Hit hit, Execution execution) { + if (!hit.isMeta()) { + if (!known(source, hit, execution)) { + addHit(hit); + } + } else { + result.hits().add(hit); + } + } + + protected void addHit(Hit hit) { + result.hits().add(hit); + } + + } + + private Result blendResultsDirectly(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new BasicMerger(result, groups.get(0)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + + private Result blendResultsUniquely(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new UniqueMerger(result, groups.get(0), new HashSet<>(20)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java new file mode 100644 index 00000000000..1152246a32e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.cache.Cache; +import com.yahoo.prelude.cache.QueryCacheKey; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +/** + * A generic caching searcher which caches all passing results. + * + * @author vegardh + */ +@After("rawQuery") +@Before("transformedQuery") +public class CachingSearcher extends Searcher { + + private static final CompoundName nocachewrite=new CompoundName("nocachewrite"); + + private Cache<QueryCacheKey, Result> cache; + private Value cacheHitRatio = null; + + public CachingSearcher(QrSearchersConfig config, Statistics manager) { + long maxSizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().cachesizemegabytes()*1024*1024; + long timeToLiveMillis = config.com().yahoo().prelude().searcher().CachingSearcher().timetoliveseconds()*1000; + long maxEntrySizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().maxentrysizebytes(); + cache=new Cache<>(maxSizeBytes, timeToLiveMillis, maxEntrySizeBytes, manager); + initRatio(manager); + } + + private void initRatio(Statistics manager) { + cacheHitRatio = new Value("querycache_hit_ratio", manager, + new Value.Parameters().setNameExtension(false).setLogRaw(false).setLogMean(true)); + } + + private synchronized void cacheHit() { + cacheHitRatio.put(1.0d); + } + + private synchronized void cacheMiss() { + cacheHitRatio.put(0.0d); + } + + private boolean noCacheWrite(Query query) { + return query.properties().getBoolean(nocachewrite); + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + if (query.getNoCache()) { + return execution.search(query); + } + QueryCacheKey queryKey = new QueryCacheKey(query); + Result cachedResult=cache.get(queryKey); + if (cachedResult!=null) { + cacheHit(); + return cachedResult; + } + cacheMiss(); + Query originalQuery = query.clone(); // Need a copy, as cache hash key later on, maybe. + Result result = execution.search(query); + execution.fill(result); + if (!noCacheWrite(query)) { + queryKey.setQuery(originalQuery); // Because the query member has changed state + cache.put(queryKey,result); + } + return result; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java new file mode 100644 index 00000000000..f4b3ab3406a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java @@ -0,0 +1,222 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +/** + * <p>Implements a document source. You pass in a query and a Result + * set. When this Searcher is called with that query it will return + * that result set.</p> + * + * <p>This supports multi-phase search.</p> + * + * <p>To avoid having to add type information for the fields, a quck hack is used to + * support testing of attribute prefetching. + * Any field in the configured hits which has a name starting by attribute + * will be returned when attribute prefetch filling is requested.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings({"rawtypes"}) +public class DocumentSourceSearcher extends Searcher { + // as for the SuppressWarnings annotation above, we are inside + // com.yahoo.prelude, this is old stuff, really no point firing off those + // warnings here... + + private Result defaultFilledResult; + private Map<Query, Result> completelyFilledResults = new HashMap<>(); + private Map<Query, Result> attributeFilledResults = new HashMap<>(); + private Map<Query, Result> unFilledResults = new HashMap<>(); + //private Result defaultUnfilledResult; + + /** Time (in ms) at which the index of this searcher was last modified */ + long editionTimeStamp=0; + + private int queryCount; + + public DocumentSourceSearcher() { + addDefaultResults(); + } + + /** + * Adds a result which can be returned either as empty, + * filled or attribute only filled later. + * Summary fields starting by "a" are attributes, others are not. + * + * @return true when replacing an existing <query, result> pair. + */ + public boolean addResultSet(Query query, Result fullResult) { + Result emptyResult = new Result(query.clone()); + Result attributeResult = new Result(query.clone()); + emptyResult.setTotalHitCount(fullResult.getTotalHitCount()); + attributeResult.setTotalHitCount(fullResult.getTotalHitCount()); + int counter=0; + for (Iterator i = fullResult.hits().deepIterator();i.hasNext();) { + Hit fullHit = (Hit)i.next(); + + Hit emptyHit = (Hit)fullHit.clone(); + emptyHit.clearFields(); + emptyHit.setFillable(); + emptyHit.setRelevance(fullHit.getRelevance()); + + Hit attributeHit = (Hit)fullHit.clone(); + removePropertiesNotStartingByA(attributeHit); + attributeHit.setFillable(); + attributeHit.setRelevance(fullHit.getRelevance()); + for (Object propertyKeyObject : (Set) fullHit.fields().keySet()) { + String propertyKey=propertyKeyObject.toString(); + if (propertyKey.startsWith("attribute")) + attributeHit.setField(propertyKey, fullHit.getField(propertyKey)); + } + if (fullHit.getField(Hit.SDDOCNAME_FIELD)!=null) + attributeHit.setField(Hit.SDDOCNAME_FIELD, fullHit.getField(Hit.SDDOCNAME_FIELD)); + + // A simple summary lookup mechanism, similar to FastSearch's + emptyHit.setField("summaryid", String.valueOf(counter)); + attributeHit.setField("summaryid", String.valueOf(counter)); + fullHit.setField("summaryid", String.valueOf(counter)); + + counter++; + emptyResult.hits().add(emptyHit); + attributeResult.hits().add(attributeHit); + } + unFilledResults.put(getQueryKeyClone(query), emptyResult); + attributeFilledResults.put(getQueryKeyClone(query), attributeResult); + if (completelyFilledResults.put(getQueryKeyClone(query), fullResult.clone()) != null) { + setEditionTimeStamp(System.currentTimeMillis()); + return true; + } + return false; + } + + /** + * Returns a query clone which has offset and hits set to null. This is used by access to + * the maps using the query as key to achieve lookup independent of offset/hits value + */ + private com.yahoo.search.Query getQueryKeyClone(com.yahoo.search.Query query) { + com.yahoo.search.Query key=query.clone(); + key.setWindow(0,0); + key.getModel().setSources(""); + return key; + } + + private void removePropertiesNotStartingByA(Hit hit) { + List<String> toRemove=new java.util.ArrayList<>(); + for (Iterator i= ((Set) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String key=(String)i.next(); + if (!key.startsWith("a")) + toRemove.add(key); + } + for (Iterator<String> i=toRemove.iterator(); i.hasNext(); ) { + String propertyName=i.next(); + hit.removeField(propertyName); + } + } + + private void addDefaultResults() { + Query q = new Query("?query=default"); + Result r = new Result(q); + r.hits().add(new Hit("http://default-1.html")); + r.hits().add(new Hit("http://default-2.html")); + r.hits().add(new Hit("http://default-3.html")); + r.hits().add(new Hit("http://default-4.html")); + defaultFilledResult = r; + addResultSet(q, r); + } + + public long getEditionTimeStamp(){ + long myEditionTime; + synchronized(this){ + myEditionTime=this.editionTimeStamp; + } + return myEditionTime; + } + + public void setEditionTimeStamp(long editionTime) { + synchronized(this){ + this.editionTimeStamp=editionTime; + } + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + queryCount++; + Result r; + r = unFilledResults.get(getQueryKeyClone(query)); + if (r == null) { + r = defaultFilledResult.clone(); + } else { + r = r.clone(); + } + r.setQuery(query); + r.hits().trim(query.getOffset(), query.getHits()); + return r; + } + + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + Result filledResult; + if ("attributeprefetch".equals(summaryClass)) + filledResult=attributeFilledResults.get(getQueryKeyClone(result.getQuery())); + else + filledResult = completelyFilledResults.get(getQueryKeyClone(result.getQuery())); + + if (filledResult == null) { + filledResult = defaultFilledResult; + } + fillHits(filledResult,result,summaryClass); + } + + private void fillHits(Result source,Result target,String summaryClass) { + for (Iterator hitsToFill= target.hits().deepIterator() ; hitsToFill.hasNext();) { + Hit hitToFill = (Hit) hitsToFill.next(); + String summaryId= (String) hitToFill.getField("summaryid"); + if (summaryId==null) continue; // Can not fill this + Hit filledHit = lookupBySummaryId(source,summaryId); + if (filledHit==null) + throw new RuntimeException("Can't fill hit with summaryid '" + summaryId + "', not present"); + + for (Iterator props= filledHit.fieldIterator();props.hasNext();) { + Map.Entry propertyEntry = (Map.Entry)props.next(); + hitToFill.setField(propertyEntry.getKey().toString(), + propertyEntry.getValue()); + } + hitToFill.setFilled(summaryClass); + } + target.analyzeHits(); + } + + private Hit lookupBySummaryId(Result result,String summaryId) { + for (Iterator i= result.hits().deepIterator(); i.hasNext(); ) { + Hit hit=(Hit)i.next(); + if (summaryId.equals(hit.getField("summaryid"))) { + return hit; + } + } + return null; + } + + /** + * Returns the number of queries made to this searcher since the last + * reset. For testing - not reliable if multiple threads makes + * queries simultaneously + */ + public int getQueryCount() { + return queryCount; + } + + public void resetQueryCount() { + queryCount=0; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java new file mode 100644 index 00000000000..10a436b3ae8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; +import java.util.Map; + + +/** + * A searcher which does parametrized collapsing. Based on + * SiteCollapsingSearcher. Deprecated - use grouping. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("deprecation") +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +public class FieldCollapsingSearcher extends Searcher { + + private static final CompoundName collapse = new CompoundName("collapse"); + private static final CompoundName collapsefield=new CompoundName("collapsefield"); + private static final CompoundName collapsesize=new CompoundName("collapsesize"); + private static final CompoundName collapseSummaryName=new CompoundName("collapse.summary"); + + /** Maximum number of queries to send next searcher */ + private int maxQueries = 4; + + /** + * The max number of hits that will be preserved per unique + * value of the collapsing parameter. + */ + private int defaultCollapseSize; + + /** + * The factor by which to scale up the requested number of hits + * from the next searcher in the chain, because collapsing will + * likely delete many hits. + */ + private double extraFactor; + + /** Create this searcher using default values for all settings */ + public FieldCollapsingSearcher() { + this((String) null); + } + + /** + * Creates a collapser + * + * @param collapseField the default field to collapse on, or null to not collapse as default + */ + public FieldCollapsingSearcher(String collapseField) { + this(1, 2.0, collapseField); + } + + @Inject + public FieldCollapsingSearcher(QrSearchersConfig config) { + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher + s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); + + init(s.collapsesize(), s.extrafactor()); + } + + /** + * Creates a collapser + * + * @param collapseSize the maximum number of hits to keep per + * field the default max number of hits in each collapsed group + * @param extraFactor the percentage by which to scale up the + * requested number of hits, to allow some hits to be removed + * without refetching + * @param collapseField the field to collapse on. This is currently <b>ignored</b>. + */ + public FieldCollapsingSearcher(int collapseSize, double extraFactor, String collapseField) { + init(collapseSize, extraFactor); + } + + private void init(int collapseSize, double extraFactor) { + this.defaultCollapseSize = collapseSize; + this.extraFactor = extraFactor; + } + + /** + * First fetch result from the next searcher in the chain. + * If collapse is active, do collapsing. + * Otherwise, act as a simple pass through + */ + public Result search(com.yahoo.search.Query query, Execution execution) { + String collapseField = query.properties().getString(collapsefield); + + if (collapseField==null) return execution.search(query); + + int collapseSize = query.properties().getInteger(collapsesize,defaultCollapseSize); + query.properties().set(collapse, "0"); + + int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; + int nextOffset = 0; + int hitsAfterCollapse; + boolean moreHitsAvailable = true; + Map<String, Integer> knownCollapses = new java.util.HashMap<>(); + Result result = new Result(query); + int performedQueries = 0; + Result resultSource; + String collapseSummary = query.properties().getString(collapseSummaryName); + + do { + resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); + String summaryClass = (collapseSummary == null) + ? query.getPresentation().getSummary() : collapseSummary; + fill(resultSource, summaryClass, execution); + collapse(result, knownCollapses, resultSource, collapseField, collapseSize); + + hitsAfterCollapse = result.getHitCount(); + if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { + // the searcher downstream has no more hits + moreHitsAvailable = false; + } + nextOffset += hitsToRequest; + if (hitsAfterCollapse < query.getOffset() + query.getHits()) { + hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor); + } + ++performedQueries; + + } while (hitsToRequest != 0 + && (hitsAfterCollapse < query.getOffset() + query.getHits()) + && moreHitsAvailable + && (performedQueries <= maxQueries)); + + // Set correct meta information + result.mergeWith(resultSource); + // Keep only (offset,.. offset+hits) hits + result.hits().trim(query.getOffset(), query.getHits()); + // Mark query as query with collapsing + query.properties().set(collapse, "1"); + return result; + } + + private Result search(Query query, Execution execution, int offset , int hits) { + query.setOffset(offset); + query.setHits(hits); + return execution.search(query); + } + + /** + * Collapse logic. Preserves only maxHitsPerField hits + * for each unique value of the collapsing parameter. + */ + private void collapse(Result result, Map<String, Integer> knownCollapses, + Result resultSource, String collapseField, int collapseSize) { + for (Iterator<Hit> it = resultSource.hits().iterator(); it.hasNext();) { + Hit unknownHit = it.next(); + + if (!(unknownHit instanceof FastHit)) { + result.hits().add(unknownHit); + continue; + } + FastHit hit = (FastHit) unknownHit; + Object peek = hit.getField(collapseField); + String collapseId = peek != null ? peek.toString() : null; + if (collapseId == null) { + result.hits().add(hit); + continue; + } + + if (knownCollapses.containsKey(collapseId)) { + int numHitsThisField = knownCollapses.get(collapseId).intValue(); + + if (numHitsThisField < collapseSize) { + result.hits().add(hit); + ++numHitsThisField; + knownCollapses.put(collapseId, new Integer(numHitsThisField)); + } + } else { + knownCollapses.put(collapseId, new Integer(1)); + result.hits().add(hit); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java new file mode 100644 index 00000000000..f7bff5b481c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * This searcher fills the results in the first phase. May be put into + * a search chain to ensure full results are present at an earlier + * time than they would normally be. + * + * @author <a href="mailto:havardpe@yahoo-inc.com">havardpe</a> + **/ +public class FillSearcher extends Searcher { + private final Searcher next; + + public FillSearcher() { + next = null; + } + + public FillSearcher(Searcher next) { + this.next = next; + } + + @Override + public Result search(Query query, Execution execution) { + Result result; + if (next == null) { + result = execution.search(query); + execution.fill(result); + } else { + Execution e = new Execution(next, execution.context()); + result = e.search(query); + e.fill(result); + } + return result; + } + + // TODO: Remove this method as it does nothing new + @Override + public void fill(Result result, String summaryClass, Execution execution) { + if (next == null) { + execution.fill(result, summaryClass); + } else { + Execution e = new Execution(next, execution.context()); + e.fill(result, summaryClass); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java new file mode 100644 index 00000000000..dbfde502b75 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.FeatureData; +import com.yahoo.search.result.StructuredData; +import com.yahoo.search.searchchain.Execution; + +import java.util.Iterator; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class JSONDebugSearcher extends Searcher { + public static final String JSON_FIELD = "JSON field: "; + public static final String STRUCT_FIELD = "Structured data field (as json): "; + public static final String FEATURE_FIELD = "Feature data field (as json): "; + + private static CompoundName PROPERTYNAME = new CompoundName("dumpjson"); + + public Result search(com.yahoo.search.Query query, Execution execution) { + Result r = execution.search(query); + String propertyName = query.properties().getString(PROPERTYNAME); + if (propertyName != null) { + execution.fill(r); + for (Iterator<Hit> i = r.hits().deepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit) h; + Object o = hit.getField(propertyName); + if (o instanceof JSONString) { + JSONString j = (JSONString) o; + r.getQuery().trace(JSON_FIELD + j.getContent(), false, 5); + } + if (o instanceof StructuredData) { + StructuredData d = (StructuredData) o; + r.getQuery().trace(STRUCT_FIELD + d.toJson(), false, 5); + } + if (o instanceof FeatureData) { + FeatureData d = (FeatureData) o; + r.getQuery().trace(FEATURE_FIELD + d.toJson(), false, 5); + } + } + } + } + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java new file mode 100644 index 00000000000..75ae960cac0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java @@ -0,0 +1,212 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.search.Searcher; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.BoldCloseFieldPart; +import com.yahoo.prelude.hitfield.BoldOpenFieldPart; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.SeparatorFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +/** + * Converts juniper highlighting to XML style + * <p> + * Note: This searcher only converts backend binary highlighting and separators + * to the configured highlighting and separator tags. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(JuniperSearcher.JUNIPER_TAG_REPLACING) +public class JuniperSearcher extends Searcher { + + public final static char RAW_HIGHLIGHT_CHAR = '\u001F'; + public final static char RAW_SEPARATOR_CHAR = '\u001E'; + + private static final String ELLIPSIS = "..."; + + // The name of the field containing document type + private static final String MAGIC_FIELD = Hit.SDDOCNAME_FIELD; + + public static final String JUNIPER_TAG_REPLACING = "JuniperTagReplacing"; + + private String boldOpenTag; + private String boldCloseTag; + private String separatorTag; + + @Inject + public JuniperSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + + boldOpenTag = config.tag().bold().open(); + boldCloseTag = config.tag().bold().close(); + separatorTag = config.tag().separator(); + } + + /** + * Convert Juniper style property highlighting to XML style. + */ + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + highlight(query.getPresentation().getBolding(), result.hits().deepIterator(), null, + execution.context().getIndexFacts().newSession(query)); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + Result workResult = result; + final int worstCase = workResult.getHitCount(); + final List<Hit> hits = new ArrayList<>(worstCase); + for (final Iterator<Hit> i = workResult.hits().deepIterator(); i.hasNext();) { + final Hit sniffHit = i.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + final FastHit hit = (FastHit) sniffHit; + if (hit.isFilled(summaryClass)) continue; + + hits.add(hit); + } + execution.fill(workResult, summaryClass); + highlight(workResult.getQuery().getPresentation().getBolding(), hits.iterator(), summaryClass, + execution.context().getIndexFacts().newSession(result.getQuery())); + } + + private void highlight(boolean bolding, Iterator<Hit> hitsToHighlight, + String summaryClass, IndexFacts.Session indexFacts) { + while (hitsToHighlight.hasNext()) { + Hit sniffHit = hitsToHighlight.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + FastHit hit = (FastHit) sniffHit; + if (summaryClass != null && ! hit.isFilled(summaryClass)) continue; + + Object searchDefinitionField = hit.getField(MAGIC_FIELD); + if (searchDefinitionField == null) continue; + String searchDefinitionName = searchDefinitionField.toString(); + + for (String fieldName : hit.fields().keySet()) { + Index index = indexFacts.getIndex(fieldName, searchDefinitionName); + if (index.getDynamicSummary() || index.getHighlightSummary()) + insertTags(hit.buildHitField(fieldName, true, true), bolding, index.getDynamicSummary()); + } + } + } + + private void insertTags(final HitField oldProperty, final boolean bolding, final boolean dynteaser) { + boolean insideHighlight = false; + for (final ListIterator<FieldPart> i = oldProperty.listIterator(); i.hasNext();) { + final FieldPart f = i.next(); + if (f instanceof SeparatorFieldPart) { + setSeparatorString(bolding, (SeparatorFieldPart) f); + } + if (f.isFinal()) { + continue; + } + + final String toQuote = f.getContent(); + List<FieldPart> newFieldParts = null; + int previous = 0; + for (int j = 0; j < toQuote.length(); j++) { + final char key = toQuote.charAt(j); + switch (key) { + case RAW_HIGHLIGHT_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addBolding(bolding, insideHighlight, f, toQuote, newFieldParts, previous, j); + previous = j + 1; + insideHighlight = !insideHighlight; + break; + case RAW_SEPARATOR_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addSeparator(bolding, dynteaser, f, toQuote, newFieldParts, + previous, j); + previous = j + 1; + break; + default: + // no action + break; + } + } + if (previous > 0 && previous < toQuote.length()) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous), f.isToken())); + } + if (newFieldParts != null) { + i.remove(); + for (final Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext();) { + i.add(j.next()); + } + } + } + } + + private void setSeparatorString(final boolean bolding,final SeparatorFieldPart f) { + if (bolding) { + f.setContent(separatorTag); + } else { + f.setContent(ELLIPSIS); + } + } + + private void addSeparator(final boolean bolding, final boolean dynteaser, + final FieldPart f, final String toQuote, + final List<FieldPart> newFieldParts, final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (dynteaser) { + final FieldPart s = (bolding ? new SeparatorFieldPart(separatorTag) : new SeparatorFieldPart(ELLIPSIS)); + newFieldParts.add(s); + } + } + + private void addBolding(final boolean bolding, + final boolean insideHighlight, final FieldPart f, + final String toQuote, final List<FieldPart> newFieldParts, + final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (bolding) { + if (insideHighlight) { + newFieldParts.add(new BoldCloseFieldPart(boldCloseTag)); + } else { + if (newFieldParts.size() > 0 + && newFieldParts.get(newFieldParts.size() - 1) instanceof BoldCloseFieldPart) { + newFieldParts.remove(newFieldParts.size() - 1); + } else { + newFieldParts.add(new BoldOpenFieldPart(boldOpenTag)); + } + } + } + } + + private List<FieldPart> initFieldParts(List<FieldPart> newFieldParts) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + return newFieldParts; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java new file mode 100644 index 00000000000..a282dc22b53 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java @@ -0,0 +1,166 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.document.BucketId; +import com.yahoo.document.BucketIdFactory; +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.document.idstring.IdString; +import com.yahoo.documentapi.messagebus.protocol.SearchColumnPolicy; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.Model; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.result.DefaultErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vdslib.BucketDistribution; +import com.yahoo.component.chain.dependencies.Before; + +import java.util.Iterator; +import java.util.logging.Logger; + + +/** + * Searcher that does efficient key/value lookup using Vespa search as a + * backend. It does so by bypassing the first phase ranking, and only performs + * the second phase summary fetching. + * + * The keys to find are input as a comma-seprated list using the <i>keys</i> + * query parameter. Each key should match a part of a document id. Given the key + * 'foo', and document id namespace 'mynamespace', the document id matched will + * be 'id:mynamespace:keyvalue::foo'. + * + * To scale the throughput with the number of partitions, the searcher uses the + * same hashing mechanisms as the document API to find out which node each key + * belongs to. The searcher then dispatches a summary request to retrieve keys + * and returns the result. + * + * @author <a href="lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +@Before(GroupingExecutor.COMPONENT_NAME) +public class KeyValueSearcher extends Searcher { + + private static final Logger log = Logger.getLogger(KeyValueSearcher.class.getName()); + private final BucketIdFactory factory = new BucketIdFactory(); + private final BucketDistribution distribution; + private final String summaryClass; + private final String idSchemePrefix; + private final int numRowBits; + private final int traceLevel = 5; + + public KeyValueSearcher(KeyvalueConfig config) { + this.summaryClass = config.summaryName(); + this.idSchemePrefix = createIdSchemePrefix(config); + this.distribution = new BucketDistribution(config.numparts(), SearchColumnPolicy.DEFAULT_NUM_BUCKET_BITS); + this.numRowBits = calcNumRowBits(config.numrows()); + log.config("Configuring " + KeyValueSearcher.class.getName() + " with " + config.numparts() + " partitions and doc id scheme '" + idSchemePrefix + "'"); + } + + private String createIdSchemePrefix(KeyvalueConfig config) { + if (config.docIdScheme().equals(KeyvalueConfig.DocIdScheme.Enum.DOC_SCHEME)) { + return "doc:" + config.docIdNameSpace() + ":"; + } else { + return "id:" + config.docIdNameSpace() + ":" + config.docIdType() + "::"; + } + } + + public Hit createHit(Query query, String key) { + String docId = createDocId(key.trim()); + BucketId id = factory.getBucketId(new DocumentId(docId)); + int partition = getPartition(id); + + FastHit hit = new FastHit(); + hit.setGlobalId(new GlobalId(IdString.createIdString(docId))); + hit.setQuery(query); + hit.setFillable(); + hit.setCached(false); + hit.setPartId(partition << numRowBits, numRowBits); + hit.setRelevance(1.0); + hit.setIgnoreRowBits(true); + hit.setDistributionKey(42); + return hit; + } + + private String createDocId(String key) { + return idSchemePrefix + key; + } + + + @Override + public Result search(Query query, Execution execution) { + String keyProp = query.properties().getString("keys"); + query.getPresentation().setSummary(summaryClass); + if (keyProp == null || keyProp.length() == 0) { + return new Result(query, new ErrorMessage(ErrorMessage.NULL_QUERY, "'keys' parameter not set or empty.")); + } + String[] keyList = keyProp.split(","); + Model model = query.getModel(); + QueryTree tree = model.getQueryTree(); + QueryCanonicalizer.canonicalize(tree); + if (tree.isEmpty()) { + tree.setRoot(new IntItem(String.valueOf(keyProp.hashCode()))); + } + + Result result = new Result(query); + for (String key : keyList) { + result.hits().add(createHit(query, key)); + } + execution.fill(result, summaryClass); + if (query.isTraceable(traceLevel)) { + traceResult(query, result); + } + int totalHits = 0; + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + totalHits++; + } else { + hitIterator.remove(); + } + } + if (totalHits != keyList.length) { + ErrorMessage error = new ErrorMessage(1, "Some keys could not be fetched"); + result.hits().setError(error); + } + result.setTotalHitCount(totalHits); + return result; + } + + private void traceResult(Query query, Result result) { + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + query.trace("Found filled hit: " + hit, traceLevel); + } else { + query.trace("Found hit that was not filled/fillable: " + hit, traceLevel); + } + } + query.trace("Error hit: " + result.hits().getErrorHit(), traceLevel); + } + + private int getPartition(BucketId bucketId) { + return distribution.getColumn(bucketId); + } + + private static int calcNumRowBits(int numRows) { + if (numRows < 1) { + throw new IllegalArgumentException(); + } + for (int i = 0; i < 30; ++i) { + if (numRows - 1 < 1 << i) { + return i; + } + } + return 31; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java new file mode 100644 index 00000000000..ac2196bb9f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java @@ -0,0 +1,376 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; + +import java.util.*; + +/** + * <p> Groups hits according to sddocname. </p> + * + * <p> For each group, the desired number of hits can be specified. </p> + * + * @author tonytv + */ +public class MultipleResultsSearcher extends Searcher { + + private final static String propertyPrefix = "multipleresultsets."; + private static final CompoundName additionalHitsFactorName=new CompoundName(propertyPrefix + "additionalHitsFactor"); + private static final CompoundName maxTimesRetrieveHeterogeneousHitsName=new CompoundName(propertyPrefix + "maxTimesRetrieveHeterogeneousHits"); + private static final CompoundName numHits=new CompoundName(propertyPrefix + "numHits"); + + public @Override Result search(Query query, Execution e) { + try { + Parameters parameters = new Parameters(query); + + query.trace("MultipleResultsSearcher: " + parameters, false, 2); + HitsRetriever hitsRetriever = new HitsRetriever(query,e,parameters); + + for (DocumentGroup documentGroup : parameters.documentGroups) { + if ( hitsRetriever.numHits(documentGroup) < documentGroup.targetNumberOfDocuments) { + hitsRetriever.retrieveMoreHits(documentGroup); + } + } + + return hitsRetriever.createMultipleResultSets(); + } catch(ParameterException exception) { + Result result = new Result(query); + result.hits().setError(ErrorMessage.createInvalidQueryParameter(exception.msg)); + return result; + } + } + + private class HitsRetriever { + + PartitionedResult partitionedResult; + + private int numRetrieveMoreHitsCalls = 0; + private int nextOffset; + private Query query; + private final Parameters parameters; + private final int hits; + private final int offset; + private Execution execution; + private Result initialResult; + + HitsRetriever(Query query, Execution execution, Parameters parameters) throws ParameterException { + this.offset=query.getOffset(); + this.hits=query.getHits(); + this.nextOffset = query.getOffset() + query.getHits(); + this.query = query; + this.parameters = parameters; + this.execution = execution; + + initialResult = retrieveHits(); + partitionedResult = new PartitionedResult(parameters.documentGroups, initialResult); + + this.query = query; + } + + void retrieveMoreHits(DocumentGroup documentGroup) { + if ( ++numRetrieveMoreHitsCalls < + parameters.maxTimesRetrieveHeterogeneousHits) { + + retrieveHeterogenousHits(); + + if (numHits(documentGroup) < + documentGroup.targetNumberOfDocuments) { + + retrieveMoreHits(documentGroup); + } + + } else { + retrieveRemainingHitsForGroup(documentGroup); + } + } + + void retrieveHeterogenousHits() { + int numHitsToRetrieve = (int)(hits * parameters.additionalHitsFactor); + + final int maxNumHitsToRetrieve = 1000; + numHitsToRetrieve = Math.min(numHitsToRetrieve,maxNumHitsToRetrieve); + + try { + query.setWindow(nextOffset,numHitsToRetrieve); + partitionedResult.addHits(retrieveHits()); + } + finally { + restoreWindow(); + nextOffset += numHitsToRetrieve; + } + } + + private void restoreWindow() { + query.setWindow(offset,hits); + } + + void retrieveRemainingHitsForGroup(DocumentGroup documentGroup) { + Set<String> oldRestrictList = query.getModel().getRestrict(); + try { + int numMissingHits = documentGroup.targetNumberOfDocuments - numHits(documentGroup); + int offset = numHits(documentGroup); + + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().add(documentGroup.documentName); + query.setWindow(offset, numMissingHits); + partitionedResult.addHits(retrieveHits()); + + } finally { + restoreWindow(); + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().addAll(oldRestrictList); + } + } + + int numHits(DocumentGroup documentGroup) { + return partitionedResult.numHits(documentGroup.documentName); + } + + Result createMultipleResultSets() { + Iterator<Hit> i = initialResult.hits().iterator(); + while (i.hasNext()) { + i.next(); + i.remove(); + } + + for (DocumentGroup group: parameters.documentGroups) { + partitionedResult.cropResultSet(group.documentName,group.targetNumberOfDocuments); + } + + partitionedResult.insertInto(initialResult.hits()); + return initialResult; + } + + private Result retrieveHits() { + Result result = execution.search(query); + // ensure that field sddocname is available + execution.fill(result); // TODO: Suffices to fill attributes + + if (result.hits().getErrorHit() != null) + initialResult.hits().getErrorHit().addErrors( + result.hits().getErrorHit()); + + + return result; + } + } + + // Assumes that field sddocname is available + private static class PartitionedResult { + + private Map<String, HitGroup> resultSets = new HashMap<>(); + + private List<Hit> otherHits = new ArrayList<>(); + + PartitionedResult(List<DocumentGroup> documentGroups,Result result) throws ParameterException { + for (DocumentGroup group : documentGroups) + addGroup(group); + + addHits(result, true); + } + + void addHits(Result result, boolean addOtherHits) { + Iterator<Hit> i = result.hits().iterator(); + while (i.hasNext()) { + add(i.next(), addOtherHits); + } + } + + void addHits(Result result) { + addHits(result, false); + } + + + void add(Hit hit, boolean addOtherHits) { + String documentName = (String)hit.getField(Hit.SDDOCNAME_FIELD); + + if (documentName != null) { + HitGroup resultSet = resultSets.get(documentName); + + if (resultSet != null) { + resultSet.add(hit); + return; + } + } + + if (addOtherHits) { + otherHits.add(hit); + } + } + + int numHits(String documentName) { + return resultSets.get(documentName).size(); + } + + void insertInto(HitGroup group) { + for (Hit hit: otherHits) { + group.add(hit); + } + + for (HitGroup hit: resultSets.values() ) { + hit.copyOrdering(group); + group.add(hit); + } + } + + void cropResultSet(String documentName, int numDocuments) { + resultSets.get(documentName).trim(0, numDocuments); + } + + private void addGroup(DocumentGroup group) throws ParameterException { + final String documentName = group.documentName; + if ( resultSets.put(group.documentName, + new HitGroup(documentName) { + /** + * + */ + private static final long serialVersionUID = 5732822886080288688L; + }) + != null ) { + + throw new ParameterException("Document name " + group.documentName + "mentioned multiple times"); + } + } + + } + + + //examples: + //multipleresultsets.numhits=music:10,movies:20 + //multipleresultsets.additionalhitsFactor=0.8 + //multipleresultsets.maxtimesretrieveheterogeneoushits=2 + private static class Parameters { + Parameters(Query query) + throws ParameterException { + + readNumHitsSpecification(query); + readMaxTimesRetrieveHeterogeneousHits(query); + readAdditionalHitsFactor(query); + } + + + List<DocumentGroup> documentGroups = new ArrayList<>(); + double additionalHitsFactor = 0.8; + int maxTimesRetrieveHeterogeneousHits = 2; + + private void readAdditionalHitsFactor(Query query) + throws ParameterException { + + String additionalHitsFactorStr = query.properties().getString(additionalHitsFactorName); + + if (additionalHitsFactorStr == null) + return; + + try { + additionalHitsFactor = + Double.parseDouble(additionalHitsFactorStr); + } catch (NumberFormatException e) { + throw new ParameterException( + "Expected floating point number, got '" + + additionalHitsFactorStr + "'."); + } + } + + private void readMaxTimesRetrieveHeterogeneousHits(Query query) { + maxTimesRetrieveHeterogeneousHits = query.properties().getInteger( + maxTimesRetrieveHeterogeneousHitsName, + maxTimesRetrieveHeterogeneousHits); + } + + + private void readNumHitsSpecification(Query query) + throws ParameterException { + + //example numHitsSpecification: "music:10,movies:20" + String numHitsSpecification = + query.properties().getString(numHits); + + if (numHitsSpecification == null) + return; + + String[] numHitsForDocumentNames = numHitsSpecification.split(","); + + for (String s:numHitsForDocumentNames) { + handleDocumentNameWithNumberOfHits(s); + } + + } + + public String toString() { + String s = "additionalHitsFactor=" + additionalHitsFactor + + ", maxTimesRetrieveHeterogeneousHits=" + + maxTimesRetrieveHeterogeneousHits + + ", numHitsSpecification='"; + + for (DocumentGroup group : documentGroups) { + s += group.documentName + ":" + + group.targetNumberOfDocuments + ", "; + } + + s += "'"; + + return s; + } + + //example input: music:10 + private void handleDocumentNameWithNumberOfHits(String s) + throws ParameterException { + + String[] documentNameWithNumberOfHits = s.split(":"); + + if (documentNameWithNumberOfHits.length != 2) { + String msg = "Expected a single ':' in '" + s + "'."; + + if (documentNameWithNumberOfHits.length > 2) + msg += " Please check for missing commas."; + + throw new ParameterException(msg); + } else { + String documentName = + documentNameWithNumberOfHits[0].trim(); + try { + int numHits = Integer.parseInt( + documentNameWithNumberOfHits[1].trim()); + + numRequestedHits(documentName, numHits); + } catch (NumberFormatException e) { + throw new ParameterException( + "Excpected an integer but got '" + + documentNameWithNumberOfHits[1] + "'"); + } + } + } + + private void numRequestedHits(String documentName, int numHits) { + documentGroups.add(new DocumentGroup(documentName, numHits)); + } + + } + + private static class DocumentGroup { + String documentName; + int targetNumberOfDocuments; + + DocumentGroup(String documentName, int targetNumberOfDocuments) { + this.documentName = documentName; + this.targetNumberOfDocuments = targetNumberOfDocuments; + } + } + + @SuppressWarnings("serial") + private static class ParameterException extends Exception { + String msg; + + ParameterException(String msg) { + this.msg = msg; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java new file mode 100644 index 00000000000..03e212fc854 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java @@ -0,0 +1,174 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.geo.DegreesParser; +import com.yahoo.geo.BoundingBoxParser; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.prelude.Location; + +/** + * A searcher converting human-readable position parameters + * into internal format. + * <br> + * Reads the following query properties: + * <ul> + * <li> pos.ll (geographical latitude and longitude) + * <li> pos.xy (alternate to pos.ll - direct x and y in internal units) + * <li> pos.radius (distance in one of: + * internal units (no suffix), meter (m), kilometer (km) or miles (mi) + * </ul> + * + * @author Arne J + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(PosSearcher.POSITION_PARSING) +public class PosSearcher extends Searcher { + public static final String POSITION_PARSING = "PositionParsing"; + + private static final CompoundName posBb = new CompoundName("pos.bb"); + private static final CompoundName posLl = new CompoundName("pos.ll"); + private static final CompoundName posXy = new CompoundName("pos.xy"); + private static final CompoundName posAttributeName = new CompoundName("pos.attribute"); + private static final CompoundName posRadius = new CompoundName("pos.radius"); + private static final CompoundName posUnits = new CompoundName("pos.units"); + + // according to wikipedia: + // Earth's equatorial radius = 6378137 meter - not used + // meters per mile = 1609.344 + // 180 degrees equals one half diameter equals PI*r + // Earth's polar radius = 6356752 meter + + public final static double km2deg = 1000.000 * 180.0 / (Math.PI * 6356752.0); + public final static double mi2deg = 1609.344 * 180.0 / (Math.PI * 6356752.0); + + + public Result search(Query query, Execution execution) { + String bb = query.properties().getString(posBb); + String ll = query.properties().getString(posLl); + String xy = query.properties().getString(posXy); + + if (ll == null && xy == null && bb == null) { + return execution.search(query); // Nothing to do + } + if (query.getRanking().getLocation() != null) { + // this searcher is a NOP if there is already a location + // in the query + query.trace("query already has a location set, not processing 'pos' params", false, 1); + return execution.search(query); + } + + Location loc = new Location(); + loc.setDimensions(2); + String posAttribute = query.properties().getString(posAttributeName); + loc.setAttribute(posAttribute); + + try { + if (ll == null && xy == null && bb != null) { + parseBoundingBox(bb, loc); + } else { + if (ll != null && xy != null) { + throw new IllegalArgumentException("Cannot handle both lat/long and xy coords at the same time"); + } + if (ll != null) { + handleGeoCircle(query, ll, loc); + } + if (xy != null) { + handleXyCircle(query, xy, loc); + } + if (bb != null) { + parseBoundingBox(bb, loc); + } + } + } + catch (IllegalArgumentException e) { + // System.err.println("error: "+e); + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Error in pos parameters: " + Exceptions.toMessageString(e))); + } + // and finally: + query.getRanking().setLocation(loc); + return execution.search(query); + } + + private void handleGeoCircle(Query query, String ll, Location target) { + double ewCoord = 0; + double nsCoord = 0; + try { + DegreesParser parsed = new DegreesParser(ll); + ewCoord = parsed.longitude; + nsCoord = parsed.latitude; + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Unable to parse lat/long string '" +ll + "'", e); + } + String radius = query.properties().getString(posRadius); + double radiusdegrees = 0.0; + + if (radius == null) { + radiusdegrees = 50.0 * km2deg; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiuskm * km2deg; + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + radiusdegrees = radiusm * km2deg / 1000.0; + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiusmiles * mi2deg; + } else { + radiusdegrees = Integer.parseInt(radius) * 0.000001; + } + target.setGeoCircle(nsCoord, ewCoord, radiusdegrees); + } + + + private void handleXyCircle(Query query, String xy, Location target) { + int xcoord = 0; + int ycoord = 0; + // parse xy + int semipos = xy.indexOf(';'); + if (semipos > 0 && semipos < xy.length()) { + xcoord = Integer.parseInt(xy.substring(0, semipos)); + ycoord = Integer.parseInt(xy.substring(semipos+1, xy.length())); + } else { + throw new IllegalArgumentException("pos.xy must be in the format 'digits;digits' but was: '"+xy+"'"); + } + String radius = query.properties().getString(posRadius); + int radiusUnits = 0; + if (radius == null) { + radiusUnits = 5000; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiuskm * km2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + double radiusdegrees = radiusm * km2deg / 1000.0; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiusmiles * mi2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else { + radiusUnits = Integer.parseInt(radius); + } + target.setXyCircle(xcoord, ycoord, radiusUnits); + } + + + private static void parseBoundingBox(String bb, Location target) { + BoundingBoxParser parser = new BoundingBoxParser(bb); + target.setBoundingBox(parser.n, parser.s, parser.e, parser.w); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java new file mode 100644 index 00000000000..7d0ae0a6d99 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class QuerySnapshotSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + Query q = query.clone(); + Result r = execution.search(query); + Hit h = new Hit("meta:querysnapshot", new Relevance( + Double.POSITIVE_INFINITY)); + h.setMeta(true); + h.setField("query", q); + r.hits().add(h); + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java new file mode 100644 index 00000000000..5678cc918da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Ensures hits is 1000 or less and offset is 1000 or less. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryValidatingSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + if (query.getHits() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Too many hits (more than 1000) requested."); + result.hits().addError(error); + return result; + } + if (query.getOffset() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Offset too high (above 1000)."); + result.hits().addError(error); + return result; + } + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java new file mode 100644 index 00000000000..6c5a6492b92 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java @@ -0,0 +1,193 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.*; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.result.Hit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.ImmutableFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * A searcher which does quoting based on a quoting table. + * + * May be extended to do quoting template sensitive. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QuotingSearcher extends Searcher { + + // Char to String + private QuoteTable quoteTable; + + private synchronized void setQuoteTable(QuoteTable quoteTable) { + this.quoteTable = quoteTable; + } + private synchronized QuoteTable getQuoteTable() { + return quoteTable; + } + + private static class QuoteTable { + private final int lowerUncachedBound; + private final int upperUncachedBound; + private final Map<Character, String> quoteMap; + private final String[] lowerTable; + private final boolean useMap; + private final boolean isEmpty; + + public QuoteTable(QrQuotetableConfig config) { + int minOrd = 0; + int maxOrd = 0; + String[] newLowerTable = new String[256]; + boolean newUseMap = false; + boolean newIsEmpty = true; + Map<Character, String> newQuoteMap = new HashMap<>(); + for (Iterator<?> i = config.character().iterator(); i.hasNext(); ) { + QrQuotetableConfig.Character character + = (QrQuotetableConfig.Character)i.next(); + if (character.ordinal() > 256) { + newIsEmpty = false; + newQuoteMap.put(new Character((char)character.ordinal()), + character.quoting()); + newUseMap = true; + if (minOrd == 0 || character.ordinal() < minOrd) + minOrd = character.ordinal(); + if (maxOrd == 0 || character.ordinal() > maxOrd) + maxOrd = character.ordinal(); + } + else { + newIsEmpty = false; + newLowerTable[character.ordinal()] + = character.quoting(); + } + } + lowerUncachedBound = minOrd; + upperUncachedBound = maxOrd; + quoteMap = newQuoteMap; + useMap = newUseMap; + isEmpty = newIsEmpty; + lowerTable = newLowerTable; + } + public String get(char c) { + if (isEmpty) + return null; + int ord = (int)c; + if (ord < 256) { + return lowerTable[ord]; + } + else { + if ((!useMap) || ord < lowerUncachedBound + || ord > upperUncachedBound) + { + return null; + } + else { + return quoteMap.get(new Character(c)); + } + } + } + public boolean isEmpty() { + return isEmpty; + } + } + + public QuotingSearcher(ComponentId id, QrQuotetableConfig config) { + super(id); + setQuoteTable(new QuoteTable(config)); + } + + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + execution.fill(result); + QuoteTable translations = getQuoteTable(); + if (translations == null || translations.isEmpty()) { + return result; + } + for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext(); ) { + Hit h = i.next(); + if (h instanceof FastHit) { + quoteProperties((FastHit)h, translations); + } + } + return result; + } + + private void quoteProperties(FastHit hit, QuoteTable translations) { + for (Iterator<?> i = ((Set<?>) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String propertyName = (String) i.next(); + Object entry = hit.getField(propertyName); + if (entry == null) { + continue; + } + Class<? extends Object> propertyType = entry.getClass(); + if (propertyType.equals(HitField.class)) { + quoteField((HitField) entry, translations); + } else if (propertyType.equals(String.class)) { + quoteProperty(hit, propertyName, (String)entry, translations); + } + } + } + + private void quoteProperty(Hit hit, String fieldname, String toQuote, QuoteTable translations) { + List<FieldPart> l = translate(toQuote, translations, true); + if (l != null) { + HitField hf = new HitField(fieldname, toQuote); + hf.setTokenizedContent(l); + hit.setField(fieldname, hf); + } + } + + + private void quoteField(HitField field, QuoteTable translations) { + for (ListIterator<FieldPart> i = field.listIterator(); i.hasNext(); ) { + FieldPart f = i.next(); + if (!f.isFinal()) { + List<FieldPart> newFieldParts = translate(f.getContent(), translations, + f.isToken()); + if (newFieldParts != null) { + i.remove(); + for (Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext(); ) { + i.add(j.next()); + } + } + } + } + } + + private List<FieldPart> translate(String toQuote, QuoteTable translations, + boolean isToken) { + List<FieldPart> newFieldParts = null; + int lastIdx = 0; + for (int i = 0; i < toQuote.length(); i++) { + String quote = translations.get(toQuote.charAt(i)); + if (quote != null) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + if (lastIdx != i) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx, i), + isToken)); + } + String initContent = Character.toString(toQuote.charAt(i)); + newFieldParts.add(new ImmutableFieldPart(initContent, + quote, + isToken)); + lastIdx = i+1; + } + } + if (lastIdx > 0 && lastIdx < toQuote.length()) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx), + isToken)); + } + return newFieldParts; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java new file mode 100644 index 00000000000..3706f4fa9ea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.Optional; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.querytransform.BooleanSearcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.util.Collection; + +/** + * Checks that predicate queries don't use values outside the defined upper/lower bounds. + * + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +@After(BooleanSearcher.PREDICATE) +public class ValidatePredicateSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query, execution.context().getIndexFacts().newSession(query)); + if (e.isPresent()) { + Result r = new Result(query); + r.hits().addError(e.get()); + return r; + } + return execution.search(query); + } + + private Optional<ErrorMessage> validate(Query query, IndexFacts.Session indexFacts) { + ValidatePredicateVisitor visitor = new ValidatePredicateVisitor(indexFacts); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class ValidatePredicateVisitor extends ToolBox.QueryVisitor { + + private final IndexFacts.Session indexFacts; + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + public ValidatePredicateVisitor(IndexFacts.Session indexFacts) { + this.indexFacts = indexFacts; + } + + @Override + public boolean visit(Item item) { + if (item instanceof PredicateQueryItem) { + visit((PredicateQueryItem) item); + } + return true; + } + + private void visit(PredicateQueryItem item) { + Index index = getIndexFromUnionOfDocumentTypes(item); + for (PredicateQueryItem.RangeEntry entry : item.getRangeFeatures()) { + long value = entry.getValue(); + if (value < index.getPredicateLowerBound() || value > index.getPredicateUpperBound()) { + errorMessage = Optional.of(ErrorMessage.createIllegalQuery( + String.format("%s=%d outside configured predicate bounds.", entry.getKey(), value))); + } + } + } + + private Index getIndexFromUnionOfDocumentTypes(PredicateQueryItem item) { + return indexFacts.getIndex(item.getIndexName()); + } + + @Override + public void onExit() {} + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java new file mode 100644 index 00000000000..ee8a896f73b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; + + +/** + * Check sorting specification makes sense to the search cluster before + * passing it on to the backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Before(PhaseNames.BACKEND) +@After(ACCENT_REMOVAL) +public class ValidateSortingSearcher extends Searcher { + + private Map<String, AttributesConfig.Attribute> attributeNames = null; + private String clusterName = ""; + private final QrSearchersConfig.Searchcluster.Indexingmode.Enum indexingMode; + + public String getClusterName() { + return clusterName; + } + + public void setClusterName(String clusterName) { + this.clusterName = clusterName; + } + + private Map<String, AttributesConfig.Attribute> getAttributeNames() { + return attributeNames; + } + + public void setAttributeNames(Map<String, AttributesConfig.Attribute> attributeNames) { + this.attributeNames = attributeNames; + } + + public void initAttributeNames(AttributesConfig config) { + HashMap<String, AttributesConfig.Attribute> attributes = new HashMap<>(config.attribute().size()); + + for (AttributesConfig.Attribute attr : config.attribute()) { + if (AttributesConfig.Attribute.Collectiontype.SINGLE != attr.collectiontype()) { + continue; // cannot sort on multivalue attributes + } + attributes.put(attr.name(), attr); + } + setAttributeNames(attributes); + } + + public ValidateSortingSearcher(QrSearchersConfig qrsConfig, ClusterConfig clusterConfig, + AttributesConfig attributesConfig) + { + initAttributeNames(attributesConfig); + setClusterName(qrsConfig.searchcluster(clusterConfig.clusterId()).name()); + indexingMode = qrsConfig.searchcluster(clusterConfig.clusterId()).indexingmode(); + } + + @Override + public Result search(Query query, Execution execution) { + if (indexingMode != QrSearchersConfig.Searchcluster.Indexingmode.STREAMING) { + ErrorMessage e = validate(query); + if (e != null) { + Result r = new Result(query); + r.hits().addError(e); + return r; + } + } + return execution.search(query); + } + + private static Sorting.UcaSorter.Strength config2Strength(AttributesConfig.Attribute.Sortstrength.Enum s) { + if(s == AttributesConfig.Attribute.Sortstrength.PRIMARY) { + return Sorting.UcaSorter.Strength.PRIMARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.SECONDARY) { + return Sorting.UcaSorter.Strength.SECONDARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.TERTIARY) { + return Sorting.UcaSorter.Strength.TERTIARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.QUATERNARY) { + return Sorting.UcaSorter.Strength.QUATERNARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.IDENTICAL) { + return Sorting.UcaSorter.Strength.IDENTICAL; + } + return Sorting.UcaSorter.Strength.PRIMARY; + } + private ErrorMessage validate(Query query) { + Sorting sorting = query.getRanking().getSorting(); + List<Sorting.FieldOrder> l = (sorting != null) ? sorting.fieldOrders() : null; + + if (l == null) { + return null; + } + Map<String, AttributesConfig.Attribute> names = getAttributeNames(); + if (names == null) { + return null; + } + + String queryLocale = null; + if (query.getModel().getLocale() != null) { + queryLocale = query.getModel().getLocale().toString(); + } + + for (Sorting.FieldOrder f : l) { + String name = f.getFieldName(); + if ("[rank]".equals(name) || "[docid]".equals(name)) { + } else if (names.containsKey(name)) { + AttributesConfig.Attribute attrConfig = names.get(name); + if (attrConfig != null) { + if (f.getSortOrder() == Sorting.Order.UNDEFINED) { + f.setAscending(attrConfig.sortascending()); + } + if (f.getSorter().getClass().equals(Sorting.AttributeSorter.class)) { + // This indicates that it shall use default. + if ((attrConfig.datatype() == AttributesConfig.Attribute.Datatype.STRING)) { + if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.UCA) { + String locale = attrConfig.sortlocale(); + if (locale == null || locale.isEmpty()) { + locale = queryLocale; + } + // can only use UcaSorter if we have knowledge about wanted locale + if (locale != null) { + f.setSorter(new Sorting.UcaSorter(name, locale, Sorting.UcaSorter.Strength.UNDEFINED)); + } else { + // wanted UCA but no locale known, so use lowercase as fallback + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.LOWERCASE) { + f.setSorter(new Sorting.LowerCaseSorter(name)); + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.RAW) { + f.setSorter(new Sorting.RawSorter(name)); + } else { + // default if no config found for this string attribute + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } + } + if (f.getSorter() instanceof Sorting.UcaSorter) { + Sorting.UcaSorter sorter = (Sorting.UcaSorter) f.getSorter(); + String locale = sorter.getLocale(); + + if (locale == null || locale.isEmpty()) { + // first fallback + locale = attrConfig.sortlocale(); + } + if (locale == null || locale.isEmpty()) { + // second fallback + locale = queryLocale; + } + // final fallback + if (locale == null || locale.isEmpty()) { + locale = "en_US"; + } + + // getLogger().info("locale = " + locale + " attrConfig.sortlocale.value() = " + attrConfig.sortlocale.value() + " query.getLanguage() = " + query.getModel().getLanguage()); + // getLogger().info("locale = " + locale); + + Sorting.UcaSorter.Strength strength = sorter.getStrength(); + if (sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) { + strength = config2Strength(attrConfig.sortstrength()); + } + if ((sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) || (sorter.getLocale() == null) || sorter.getLocale().isEmpty()) { + // getLogger().info("locale = " + locale + " strength = " + strength.toString()); + sorter.setLocale(locale, strength); + } + //getLogger().info("locale = " + locale + " strength = " + strength.toString() + "decompose = " + sorter.getDecomposition()); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has attribute config for field: " + name); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has no sortable attribute named: " + name); + } + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java new file mode 100644 index 00000000000..5a795e859af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.searcher; + +import com.yahoo.osgi.annotation.ExportPackage; |