diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude')
240 files changed, 30045 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java b/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java new file mode 100644 index 00000000000..24492831156 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/ConfigurationException.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +/** + * Thrown at events which are likely caused by misconfiguration + * + * @author bratseth + */ +public class ConfigurationException extends RuntimeException { + + public ConfigurationException(String message) { + super(message); + } + + public ConfigurationException(String message, Throwable cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Freshness.java b/container-search/src/main/java/com/yahoo/prelude/Freshness.java new file mode 100644 index 00000000000..9d0b3ec06c8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Freshness.java @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.Calendar; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * The parameters for a freshness query (uses the datetime http parameter) + * Parses the string part of the "datetime=<string>", converts it to seconds + * since epoch and send that plus sets the flag in the QueryX packet that + * enables freshnessboost in fsearch. + * <p> + * This is a value object + * + * @author <a href="mailto:pauran@yahoo-inc.com">Per G. Auran</a> + */ +public class Freshness { + + private long refSecondsSinceEpoch = 0; // reference time + + private void parse(String dateTime) { + + /** Convert dateTime string to seconds since epoch */ + if (dateTime.startsWith("now")) { + + /** Case 1: if string starts with now: special case read system time */ + refSecondsSinceEpoch = getSystemTimeInSecondsSinceEpoch(); + + /** Case 2: now can be followed by -seconds for time offset */ + if (dateTime.startsWith("now-")) { + // offset in seconds may be given + String offsetStr = dateTime.substring(4); + long timeOffset; + if ( offsetStr.length() > 0) { + timeOffset = Long.parseLong(offsetStr); + } else { + timeOffset = 1; + } + refSecondsSinceEpoch = refSecondsSinceEpoch - timeOffset; + } + } else { /** Case 3: Reftime explicitly given seconds since epoch */ + refSecondsSinceEpoch = Long.parseLong(dateTime); + } + // Need to activate freshness in the QueryX packet if enabled: See QueryPacket.java + } + + public Freshness(String dateTime) { + parse(toLowerCase(dateTime)); // Set reference time + } + + /** Calculates the current time since epoch in seconds */ + public long getSystemTimeInSecondsSinceEpoch() { + long msSinceEpochNow = Calendar.getInstance().getTimeInMillis(); + return (msSinceEpochNow/1000); + } + + /** Get the reference time as a long value (in seconds since epoch) */ + public long getRefTime() {return refSecondsSinceEpoch;} + + /** Set the reference time as a string value */ + @Override + public String toString() { + StringBuilder ser = new StringBuilder(); + /** convert long value to string */ + String dateTime = Long.toString(refSecondsSinceEpoch); + ser.append(dateTime); + return ser.toString().trim(); + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (! (other instanceof Freshness)) return false; + return ((Freshness)other).refSecondsSinceEpoch == this.refSecondsSinceEpoch; + } + + @Override + public int hashCode() { + return (int)refSecondsSinceEpoch; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Index.java b/container-search/src/main/java/com/yahoo/prelude/Index.java new file mode 100644 index 00000000000..5b1944c352b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Index.java @@ -0,0 +1,325 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + + +import com.yahoo.language.process.StemMode; + +import java.util.Iterator; +import java.util.Set; + + +/** + * Information about configured settings of a field or field collection (an actual index or not) in a search definition. + * There are two types of settings: + * <ul> + * <li><i>Typed commands</i> are checked using a particular is/get method + * <li><i>Untyped commands</i> are checked using hasCommand and commandIterator + * </ul> + * addCommand sets both types. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Index { + + public static class Attribute { + private boolean tokenizedContent = false; + public final String name; + + public Attribute(String name) { + this.name = name; + } + + public boolean isTokenizedContent() { + return tokenizedContent; + } + + public void setTokenizedContent(boolean tokenizedContent) { + this.tokenizedContent = tokenizedContent; + } + } + + /** The null index - don't use this for name lookups */ + public static final Index nullIndex = new Index("(null)"); + + private String name; + private boolean uriIndex = false; + private boolean hostIndex = false; + private StemMode stemMode = StemMode.NONE; + private Attribute[] matchGroup = null; + private boolean isAttribute = false; + private boolean isDefaultPosition = false; + private boolean dynamicSummary=false; + private boolean highlightSummary=false; + private boolean lowercase = false; + private boolean plainTokens = false; + private boolean multivalue = false; + private boolean fastSearch = false; + private boolean normalize = false; + private boolean literalBoost = false; + private boolean numerical = false; + private long predicateUpperBound = Long.MAX_VALUE; + private long predicateLowerBound = Long.MIN_VALUE; + + /** + * True if this is an <i>exact</i> index - which should match + * tokens containing any characters + */ + private boolean exact = false; + + private boolean isNGram = false; + private int gramSize=2; + + /** + * The string terminating an exact token in this index, + * or null to use the default (space) + */ + private String exactTerminator = null; + + private Set<String> commands = new java.util.HashSet<>(); + + public Index(String name) { + this.name = name; + } + + /** + * Returns the canonical name of this index, unless it + * is the null index, which doesn't have a canonical name + */ + public String getName() { + return name; + } + + public boolean isUriIndex() { + return uriIndex; + } + + public boolean isDefaultPosition() { + return isDefaultPosition; + } + + public void setDefaultPosition(boolean v) { + isDefaultPosition = v; + } + + public void setUriIndex(boolean uriIndex) { + this.uriIndex = uriIndex; + } + + public boolean isHostIndex() { + return hostIndex; + } + + public void setHostIndex(boolean hostIndex) { + this.hostIndex = hostIndex; + } + + public StemMode getStemMode() { + return stemMode; + } + + public void setStemMode(StemMode stemMode) { + this.stemMode = stemMode; + } + + public void setStemMode(String name) { + this.stemMode = StemMode.valueOf(name); + } + + /** Adds a type or untyped command string to this */ + public Index addCommand(String commandString) { + if ("fullurl".equals(commandString)) { + setUriIndex(true); + } else if ("urlhost".equals(commandString)) { + setHostIndex(true); + } else if (commandString.startsWith("stem ")) { + setStemMode(commandString.substring(5)); + } else if (commandString.startsWith("stem:")) { + setStemMode(commandString.substring(5)); + } else if ("stem".equals(commandString)) { + setStemMode(StemMode.SHORTEST); + } else if ("word".equals(commandString)) { + setExact(true, null); + } else if ("exact".equals(commandString)) { + setExact(true, " "); + } else if ("dynteaser".equals(commandString)) { + setDynamicSummary(true); + } else if ("highlight".equals(commandString)) { + setHighlightSummary(true); + } else if ("lowercase".equals(commandString)) { + setLowercase(true); + } else if (commandString.startsWith("exact ")) { + setExact(true, commandString.substring(6)); + } else if (commandString.startsWith("ngram ")) { + setNGram(true,Integer.parseInt(commandString.substring(6))); + } else if (commandString.equals("attribute")) { + setAttribute(true); + } else if (commandString.equals("default-position")) { + setDefaultPosition(true); + } else if (commandString.startsWith("match-group ")) { + setMatchGroup(commandString.substring(12).split(" ")); + } else if (commandString.equals("plain-tokens")) { + setPlainTokens(true); + } else if (commandString.equals("multivalue")) { + setMultivalue(true); + } else if (commandString.equals("fast-search")) { + setFastSearch(true); + } else if (commandString.equals("normalize")) { + setNormalize(true); + } else if (commandString.equals("literal-boost")) { + setLiteralBoost(true); + } else if (commandString.equals("numerical")) { + setNumerical(true); + } else if (commandString.startsWith("predicate-bounds ")) { + setPredicateBounds(commandString.substring(17)); + } else { + commands.add(commandString); + } + return this; + } + + private void setPredicateBounds(String bounds) { + if ( ! bounds.startsWith("[..")) { + predicateLowerBound = Long.parseLong(bounds.substring(1, bounds.indexOf(".."))); + } else { + predicateLowerBound = Long.MIN_VALUE; + } + if ( ! bounds.endsWith("..]")) { + predicateUpperBound = Long.parseLong(bounds.substring(bounds.indexOf("..") + 2, bounds.length() - 1)); + } else { + predicateUpperBound = Long.MAX_VALUE; + } + + } + + /** + * Whether terms in this field are lower cased when indexing. + * + * @param lowercase true if terms are lowercased + */ + public void setLowercase(boolean lowercase) { + this.lowercase = lowercase; + } + + /** + * Whether terms in this field are lower cased when indexing. + * + * @return true if terms are lowercased + */ + public boolean isLowercase() { + return lowercase; + } + + /** Returns an iterator of all the untyped commands of this */ + public Iterator<String> commandIterator() { + return commands.iterator(); + } + + /** Checks whether this has the given (exact) <i>untyped</i> command string */ + public boolean hasCommand(String commandString) { + return commands.contains(commandString); + } + + /** + * Set whether this index should match any kind of characters + * + * @param exact true to make this index match any kind of characters, not just word and digit ones + * @param terminator the terminator of an exact sequence (one or more characters), + * or null to use the default (space) + */ + public void setExact(boolean exact, String terminator) { + this.exact = exact; + this.exactTerminator = terminator; + } + + /** Returns whether this is an exact index, which should match tokens containing any characters */ + public boolean isExact() { return exact; } + + /** Returns the string terminating an exact sequence in this index, or null to use the default (space) */ + public String getExactTerminator() { return exactTerminator; } + + /** Returns true if this is an ngram index (default: false) */ + public boolean isNGram() { return isNGram; } + + /** Returns the gram size. Only used if isNGram is true (default: 2)*/ + public int getGramSize() { return gramSize; } + + public void setNGram(boolean nGram,int gramSize) { + this.isNGram=nGram; + this.gramSize=gramSize; + } + + public void setDynamicSummary(boolean dynamicSummary) { this.dynamicSummary=dynamicSummary; } + public boolean getDynamicSummary() { return dynamicSummary; } + + public void setHighlightSummary(boolean highlightSummary) { this.highlightSummary=highlightSummary; } + public boolean getHighlightSummary() { return highlightSummary; } + + /** Returns true if this is the null index */ + // TODO: Replace by == Index.null + public boolean isNull() { + return "(null)".equals(name); + } + + public Attribute[] getMatchGroup() { // TODO: Not in use on Vespa 6 + return matchGroup; + } + + public void setMatchGroup(String[] attributes) { + Attribute[] a = new Attribute[attributes.length]; + + for (int i = 0; i < attributes.length; i++) { + a[i] = new Attribute(attributes[i].trim()); + } + this.matchGroup = a; + } + + public boolean isAttribute() { + return isAttribute; + } + + public void setAttribute(boolean isAttribute) { + this.isAttribute = isAttribute; + } + + public boolean hasPlainTokens() { + return plainTokens; + } + + public void setPlainTokens(boolean plainTokens) { + this.plainTokens = plainTokens; + } + + public void setMultivalue(boolean multivalue) { this.multivalue = multivalue; } + + /** Returns true if this is a multivalue field */ + public boolean isMultivalue() { return multivalue; } + + public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; } + + /** Returns true if this is an attribute with fastsearch turned on */ + public boolean isFastSearch() { return fastSearch; } + + public void setNormalize(boolean normalize) { this.normalize = normalize; } + + /** Returns true if the content of this index is normalized */ + public boolean getNormalize() { return normalize; } + + public boolean getLiteralBoost() { return literalBoost; } + + public void setLiteralBoost(boolean literalBoost) { this.literalBoost = literalBoost; } + + public void setNumerical(boolean numerical) { this.numerical = numerical; } + + public boolean isNumerical() { return numerical; } + + public long getPredicateUpperBound() { return predicateUpperBound; } + + public long getPredicateLowerBound() { return predicateLowerBound; } + + @Override + public String toString() { + return "index '" + getName() + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java new file mode 100644 index 00000000000..9a079c0d23b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java @@ -0,0 +1,440 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + + +import com.google.common.collect.ImmutableList; +import com.yahoo.language.process.StemMode; +import com.yahoo.search.Query; + +import java.util.*; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * A central repository for information about indices. Standard usage is + * + * <pre><code> + * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query + * session.getIndex(indexName).[get index info] + * </code></pre> + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +// TODO: We should replace this with a better representation of search definitions +// which is immutable, models clusters and search definitions inside clusters properly, +// and uses better names. +public class IndexFacts { + + private Map<String, List<String>> clusterByDocument; + + private static class DocumentTypeListOffset { + public final int offset; + public final SearchDefinition searchDefinition; + + public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) { + this.offset = offset; + this.searchDefinition = searchDefinition; + } + } + + /** A Map of all known search definitions indexed by name */ + private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>(); + + /** A map of document types contained in each cluster indexed by cluster name */ + private Map<String, List<String>> clusters = new LinkedHashMap<>(); + + /** + * The name of the default search definition, which is the union of all + * known document types. + */ + public static final String unionName = "unionOfAllKnown"; + + /** A search definition which contains the union of all settings. */ + private SearchDefinition unionSearchDefinition=new SearchDefinition(unionName); + + private boolean frozen; + + /** Whether this has (any) NGram indexes. Calculated at freeze time. */ + private boolean hasNGramIndices; + + public IndexFacts() {} + + @SuppressWarnings({"deprecation"}) + public IndexFacts(IndexModel indexModel) { + if (indexModel.getSearchDefinitions() != null && indexModel.getUnionSearchDefinition() != null) { + setSearchDefinitions(indexModel.getSearchDefinitions(), indexModel.getUnionSearchDefinition()); + } + if (indexModel.getMasterClusters() != null) { + setMasterClusters(indexModel.getMasterClusters()); + } + } + + private void setMasterClusters(Map<String, List<String>> clusters) { + // TODO: clusters should probably be a separate class + this.clusters = clusters; + clusterByDocument = invert(clusters); + } + + private static Map<String, List<String>> invert(Map<String, List<String>> clusters) { + Map<String, List<String>> result = new HashMap<>(); + for (Map.Entry<String,List<String>> entry : clusters.entrySet()) { + for (String value : entry.getValue()) { + addEntry(result, value, entry.getKey()); + } + } + return result; + } + + private static void addEntry(Map<String, List<String>> result, String key, String value) { + List<String> values = result.get(key); + if (values == null) { + values = new ArrayList<>(); + result.put(key, values); + } + values.add(value); + } + + // Assumes that document names are equal to the search definition that contain them. + public List<String> clustersHavingSearchDefinition(String searchDefinitionName) { + if (clusterByDocument == null) + return Collections.emptyList(); + + List<String> clusters = clusterByDocument.get(searchDefinitionName); + return clusters != null ? clusters : Collections.<String>emptyList(); + } + + /** + * Public only for testing. + */ + public void setClusters(Map<String, List<String>> clusters) { + ensureNotFrozen(); + this.clusters = clusters; + clusterByDocument = invert(clusters); + } + + public void setSearchDefinitions(Map<String, SearchDefinition> searchDefinitions, + SearchDefinition unionSearchDefinition) { + ensureNotFrozen(); + this.searchDefinitions = searchDefinitions; + this.unionSearchDefinition = unionSearchDefinition; + } + + private boolean isInitialized() { + return searchDefinitions.size() > 0; + } + + private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) { + if (!isInitialized()) return true; + + if (documentTypes.isEmpty()) { + return unionSearchDefinition.getIndex(indexName) != null; + } + + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(indexName); + if (index != null) { + return true; + } + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + + return false; + } + + private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) { + if (!isInitialized()) return indexName; + + if (documentTypes.isEmpty()) { + Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + return index == null ? indexName : index.getName(); + } + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName)); + if (index != null) return index.getName(); + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + return indexName; + } + + private Index getIndexFromDocumentTypes(String indexName, List<String> documentTypes) { + if (indexName==null || indexName.isEmpty()) + indexName="default"; + + return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes); + } + + private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List<String> documentTypes) { + if ( ! isInitialized()) return Index.nullIndex; + + if (documentTypes.isEmpty()) { + Index index = unionSearchDefinition.getIndex(canonicName); + if (index == null) return Index.nullIndex; + return index; + } + + DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0); + while (sd != null) { + Index index = sd.searchDefinition.getIndex(canonicName); + + if (index != null) return index; + sd = chooseSearchDefinition(documentTypes, sd.offset); + } + return Index.nullIndex; + } + + /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */ + private Set<String> resolveDocumentTypes(Query query) { + // Assumption: Search definition name equals document name. + return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(), + searchDefinitions.keySet()); + } + + /** + * Given a search list which is a mixture of document types and cluster + * names, and a restrict list which is a list of document types, return a + * set of all valid document types for this combination. Most use-cases for + * fetching index settings will involve calling this method with the the + * incoming query's {@link com.yahoo.search.query.Model#getSources()} and + * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters + * before calling any other method of this class. + * + * @param sources the search list for a query + * @param restrict the restrict list for a query + * @return a (possibly empty) set of valid document types + */ + private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict, + Set<String> candidateDocumentTypes) { + sources = emptyCollectionIfNull(sources); + restrict = emptyCollectionIfNull(restrict); + + if (sources.isEmpty()) { + if ( ! restrict.isEmpty()) { + return new TreeSet<>(restrict); + } else { + return candidateDocumentTypes; + } + } + + Set<String> toSearch = new TreeSet<>(); + for (String source : sources) { // source: a document type or a cluster containing them + List<String> clusterDocTypes = clusters.get(source); + if (clusterDocTypes == null) { // source was a document type + if (candidateDocumentTypes.contains(source)) { + toSearch.add(source); + } + } else { // source was a cluster, having document types + for (String documentType : clusterDocTypes) { + if (candidateDocumentTypes.contains(documentType)) { + toSearch.add(documentType); + } + } + } + } + + if ( ! restrict.isEmpty()) { + toSearch.retainAll(restrict); + } + + return toSearch; + } + + private Collection<String> emptyCollectionIfNull(Collection<String> collection) { + return collection == null ? Collections.<String>emptyList() : collection; + } + + /** + * Chooses the correct search definition, default if in doubt. + * + * @return the search definition to use + */ + private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) { + while (index < documentTypes.size()) { + String docName = documentTypes.get(index++); + SearchDefinition sd = searchDefinitions.get(docName); + if (sd != null) { + return new DocumentTypeListOffset(index, sd); + } + } + return null; + } + + /** + * Freeze this to prevent further changes. + */ + public void freeze() { + hasNGramIndices = hasNGramIndices(); + // TODO: Freeze content! + frozen = true; + } + + /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */ + public boolean hasNGramIndices() { + if (frozen) return hasNGramIndices; + for (Map.Entry<String,SearchDefinition> searchDefinition : searchDefinitions.entrySet()) { + for (Index index : searchDefinition.getValue().indices().values()) + if (index.isNGram()) return true; + } + return false; + } + + /** + * @return whether it is permissible to update this object + */ + public boolean isFrozen() { + return frozen; + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify frozen IndexFacts instance."); + } + } + + + /** + * Add a string to be accepted as an index name when parsing a + * query. + * + * For testing only. + * + * @param sdName name of search definition containing index, if null, modify default set + * @param indexName name of index, actual or otherwise + */ + public void addIndex(String sdName, String indexName) { + ensureNotFrozen(); + + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + sd = new SearchDefinition(sdName); + searchDefinitions.put(sdName, sd); + } + sd.getOrCreateIndex(indexName); + unionSearchDefinition.getOrCreateIndex(indexName); + } + + /** + * Adds an index to the specified index, and the default index settings, + * overriding any current settings for this index + */ + public void addIndex(String sdName, Index index) { + ensureNotFrozen(); + + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + sd = new SearchDefinition(sdName); + searchDefinitions.put(sdName, sd); + } + sd.addIndex(index); + unionSearchDefinition.addIndex(index); + } + + public String getDefaultPosition(String sdName) { + SearchDefinition sd; + if (sdName == null) { + sd = unionSearchDefinition; + } else if (searchDefinitions.containsKey(sdName)) { + sd = searchDefinitions.get(sdName); + } else { + return null; + } + + return sd.getDefaultPosition(); + } + + public Session newSession(Query query) { + return new Session(query); + } + + public Session newSession(Collection<String> sources, Collection<String> restrict) { + return new Session(sources, restrict); + } + + public Session newSession(Collection<String> sources, Collection<String> restrict, + Set<String> candidateDocumentTypes) { + return new Session(sources, restrict, candidateDocumentTypes); + } + + /** + * Create an instance of this to look up index facts with a given query. + * Note that if the model.source or model.restrict parameters of the query + * is changed another session should be created. This is immutable. + */ + public class Session { + + private final List<String> documentTypes; + + private Session(Query query) { + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(query)); + } + + private Session(Collection<String> sources, Collection<String> restrict) { + // Assumption: Search definition name equals document name. + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet())); + } + + private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) { + documentTypes = ImmutableList.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes)); + } + + /** + * Returns the index for this name. + * + * @param indexName the name of the index. If this is null or empty the index + * named "default" is returned + * @return the index best matching the input parameters or the nullIndex + * (never null) if none is found + */ + public Index getIndex(String indexName) { + return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes); + } + + /** Returns an index given from a given search definition */ + // Note: This does not take the context into account currently. + // Ideally, we should be able to resolve the right search definition name + // in the context of the searched clusters, but this cannot be modelled + // currently by the flat structure in IndexFacts. + // That can be fixed without changing this API. + public Index getIndex(String indexName, String documentType) { + return IndexFacts.this.getIndexFromDocumentTypes(indexName, Collections.singletonList(documentType)); + } + + /** + * Returns the canonical form of the index name (Which may be the same as + * the input). + * + * @param indexName index name or alias + */ + public String getCanonicName(String indexName) { + return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes); + } + + /** + * Returns whether the given name is an index. + * + * @param indexName index name candidate + */ + public boolean isIndex(String indexName) { + return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes); + } + + /** Returns an immutable list of the document types this has resolved to */ + public List<String> documentTypes() { return documentTypes; } + + @Override + public String toString() { + return "index facts for search definitions " + documentTypes; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexModel.java b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java new file mode 100644 index 00000000000..a4e08accd48 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.log.LogLevel; +import com.yahoo.search.config.IndexInfoConfig; +import com.yahoo.container.QrSearchersConfig; + +/** + * Parameter class used for construction IndexFacts. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class IndexModel { + private static final Logger log = Logger.getLogger(IndexModel.class.getName()); + + // Copied from MasterClustersInfoUpdater. It's a temporary workaround for IndexFacts. + private Map<String, List<String>> masterClusters; + private Map<String, SearchDefinition> searchDefinitions; + private SearchDefinition unionSearchDefinition; + + /** + * Use IndexModel as a pure wrapper for the parameters given. + */ + public IndexModel(Map<String, List<String>> masterClusters, + Map<String, SearchDefinition> searchDefinitions, + SearchDefinition unionSearchDefinition) { + this.masterClusters = masterClusters; + this.searchDefinitions = searchDefinitions; + this.unionSearchDefinition = unionSearchDefinition; + } + + public IndexModel(IndexInfoConfig indexInfo, Map<String, List<String>> clusters) { + if (indexInfo != null) { + setDefinitions(indexInfo); + } else { + searchDefinitions = null; + unionSearchDefinition = null; + } + this.masterClusters = clusters; + } + + public IndexModel(IndexInfoConfig indexInfo, QrSearchersConfig clusters) { + if (indexInfo != null) { + setDefinitions(indexInfo); + } else { + searchDefinitions = null; + unionSearchDefinition = null; + } + if (clusters != null) { + setMasterClusters(clusters); + } else { + masterClusters = null; + } + } + + private void setMasterClusters(QrSearchersConfig config) { + masterClusters = new HashMap<>(); + for (int i = 0; i < config.searchcluster().size(); ++i) { + List<String> docTypes = new ArrayList<>(); + String clusterName = config.searchcluster(i).name(); + for (int j = 0; j < config.searchcluster(i).searchdef().size(); ++j) { + docTypes.add(config.searchcluster(i).searchdef(j)); + } + masterClusters.put(clusterName, docTypes); + } + } + + private void setDefinitions(IndexInfoConfig c) { + searchDefinitions = new HashMap<>(); + unionSearchDefinition = new SearchDefinition(IndexFacts.unionName); + + for (Iterator<IndexInfoConfig.Indexinfo> i = c.indexinfo().iterator(); i.hasNext();) { + IndexInfoConfig.Indexinfo info = i.next(); + + SearchDefinition sd = new SearchDefinition(info.name()); + + for (Iterator<IndexInfoConfig.Indexinfo.Command> j = info.command().iterator(); j.hasNext();) { + IndexInfoConfig.Indexinfo.Command command = j.next(); + sd.addCommand(command.indexname(),command.command()); + unionSearchDefinition.addCommand(command.indexname(),command.command()); + } + + sd.fillMatchGroups(); + searchDefinitions.put(info.name(), sd); + } + unionSearchDefinition.fillMatchGroups(); + + for (IndexInfoConfig.Indexinfo info : c.indexinfo()) { + + SearchDefinition sd = searchDefinitions.get(info.name()); + + for (IndexInfoConfig.Indexinfo.Alias alias : info.alias()) { + String aliasString = alias.alias(); + String indexString = alias.indexname(); + + sd.addAlias(aliasString, indexString); + try { + unionSearchDefinition.addAlias(aliasString, indexString); + } catch (RuntimeException e) { + log.log(LogLevel.WARNING, + "Ignored the alias \"" + + aliasString + + "\" for \"" + + indexString + + "\" in the union of all search definitions," + + " source has to be explicitly set to \"" + + sd.getName() + + "\" for that alias to work.", e); + } + } + } + } + + public Map<String, List<String>> getMasterClusters() { + return masterClusters; + } + + public Map<String, SearchDefinition> getSearchDefinitions() { + return searchDefinitions; + } + + public SearchDefinition getUnionSearchDefinition() { + return unionSearchDefinition; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Location.java b/container-search/src/main/java/com/yahoo/prelude/Location.java new file mode 100644 index 00000000000..10d63051cbe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Location.java @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; +import java.util.StringTokenizer; + +/** + * Location data for a geographical query. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author arnej27959 + */ +public class Location { + + // 1 or 2 + private int dimensions = 0; + + // line elements and rectangles + private int x1 = 0; + private int y1 = 0; + private int x2 = 1; + private int y2 = 1; + + // center(x,y), radius + private int x = 1; + private int y = 1; + private int r = 1; + + // next three are now UNUSED + // ranking table, rank multiplier (scale) + // {0, 1} an int to make parsing and rendering the hit even simpler + private int tableId = 0; + private int s = 1; + private int replace = 0; + + private boolean renderCircle = false; + private boolean renderRectangle = false; + private long aspect = 0; + + private String attribute; + + public boolean equals(Object other) { + if (! (other instanceof Location)) return false; + Location l = (Location)other; + return dimensions == l.dimensions + && renderCircle == l.renderCircle + && renderRectangle == l.renderRectangle + && aspect == l.aspect + && x1 == l.x1 + && x2 == l.x2 + && y1 == l.y1 + && y2 == l.y2 + && x == l.x + && y == l.y + && r == l.r; + } + + public boolean hasDimensions() { + return dimensions != 0; + } + public void setDimensions(int d) { + if (hasDimensions() && dimensions != d) { + throw new IllegalArgumentException("already has dimensions="+dimensions+", cannot change it to "+d); + } + if (d == 1 || d == 2) { + dimensions = d; + } else { + throw new IllegalArgumentException("Illegal location, dimensions must be 1 or 2, but was: "+d); + } + } + public int getDimensions() { + return dimensions; + } + + // input data are degrees n/e (if positive) or s/w (if negative) + public void setBoundingBox(double n, double s, + double e, double w) + { + setDimensions(2); + if (hasBoundingBox()) { + throw new IllegalArgumentException("can only set bounding box once"); + } + int px1 = (int) (Math.round(w * 1000000)); + int px2 = (int) (Math.round(e * 1000000)); + int py1 = (int) (Math.round(s * 1000000)); + int py2 = (int) (Math.round(n * 1000000)); + if (px1 > px2) { + throw new IllegalArgumentException("cannot have w > e"); + } + x1 = px1; + x2 = px2; + if (py1 > py2) { + throw new IllegalArgumentException("cannot have s > n"); + } + y1 = py1; + y2 = py2; + renderRectangle = true; + } + + private void adjustAspect() { + //calculate aspect based on latitude (elevation angle) + //no need to "optimize" for special cases, exactly 0, 30, 45, 60, or 90 degrees won't be input anyway + double degrees = (double) y / 1000000d; + if (degrees <= -90.0 || degrees >= +90.0) { + aspect = 0; + return; + } + double radians = degrees * Math.PI / 180d; + double cosLatRadians = Math.cos(radians); + aspect = (long) (cosLatRadians * 4294967295L); + } + + public void setGeoCircle(double ns, double ew, double radius_in_degrees) { + setDimensions(2); + if (isGeoCircle()) { + throw new IllegalArgumentException("can only set geo circle once"); + } + int px = (int) (ew * 1000000); + int py = (int) (ns * 1000000); + int pr = (int) (radius_in_degrees * 1000000); + if (ew < -180.1 || ew > +180.1) { + throw new IllegalArgumentException("e/w location must be in range [-180,+180]"); + } + if (ns < -90.1 || ns > +90.1) { + throw new IllegalArgumentException("n/s location must be in range [-90,+90]"); + } + if (radius_in_degrees < 0 || radius_in_degrees > 180.0) { + throw new IllegalArgumentException("radius must be in range [0,180] degrees, approximately upto 20000km"); + } + x = px; + y = py; + r = pr; + renderCircle = true; + adjustAspect(); + } + + public void setXyCircle(int px, int py, int radius_in_units) { + setDimensions(2); + if (isGeoCircle()) { + throw new IllegalArgumentException("can only set geo circle once"); + } + if (radius_in_units < 0) { + throw new IllegalArgumentException("radius must be positive"); + } + x = px; + y = py; + r = radius_in_units; + renderCircle = true; + } + + private void parseRectangle(String rectangle) { + int endof = rectangle.indexOf(']'); + if (endof == -1) { + throw new IllegalArgumentException("Illegal location syntax: "+rectangle); + } + String rectPart = rectangle.substring(1,endof); + StringTokenizer tokens = new StringTokenizer(rectPart, ","); + setDimensions(Integer.parseInt(tokens.nextToken())); + if (dimensions == 1) { + x1 = Integer.parseInt(tokens.nextToken()); + x2 = Integer.parseInt(tokens.nextToken()); + if (tokens.hasMoreTokens()) { + throw new IllegalArgumentException("Illegal location syntax: "+rectangle); + } + } else if (dimensions == 2) { + x1 = Integer.parseInt(tokens.nextToken()); + y1 = Integer.parseInt(tokens.nextToken()); + x2 = Integer.parseInt(tokens.nextToken()); + y2 = Integer.parseInt(tokens.nextToken()); + } + renderRectangle = true; + String theRest = rectangle.substring(endof+1).trim(); + if (theRest.length() >= 15 && theRest.charAt(0) == '(') { + parseCircle(theRest); + } + } + + private void parseCircle(String circle) { + int endof = circle.indexOf(')'); + if (endof == -1) { + throw new IllegalArgumentException("Illegal location syntax: "+circle); + } + String circlePart = circle.substring(1,endof); + StringTokenizer tokens = new StringTokenizer(circlePart, ","); + setDimensions(Integer.parseInt(tokens.nextToken())); + x = Integer.parseInt(tokens.nextToken()); + if (dimensions == 2) { + y = Integer.parseInt(tokens.nextToken()); + } + r = Integer.parseInt(tokens.nextToken()); + Integer.parseInt(tokens.nextToken()); // was "tableId" + Integer.parseInt(tokens.nextToken()); // was "scale" (multiplier) + Integer.parseInt(tokens.nextToken()); // was "replace" + + if (dimensions == 1) { + if (tokens.hasMoreTokens()) { + throw new IllegalArgumentException("Illegal location syntax: "+circle); + } + } + else { + if (tokens.hasMoreTokens()) { + String aspectToken = tokens.nextToken(); + if (aspectToken.equalsIgnoreCase("CalcLatLon")) { + adjustAspect(); + } else { + try { + aspect = Long.parseLong(aspectToken); + } catch (NumberFormatException nfe) { + throw new IllegalArgumentException("Aspect "+aspectToken+" for location must be an integer or 'CalcLatLon' for automatic aspect calculation.", nfe); + } + if (aspect > 4294967295L || aspect < 0) { + throw new IllegalArgumentException("Aspect "+aspect+" for location parameter must be less than 4294967296 (2^32)"); + } + } + } + } + renderCircle = true; + String theRest = circle.substring(endof+1).trim(); + if (theRest.length() > 5 && theRest.charAt(0) == '[') { + parseRectangle(theRest); + } + } + + public Location() {} + + public Location(String rawLocation) { + int attributeSepPos = rawLocation.indexOf(':'); + String locationSpec = rawLocation; + if (attributeSepPos != -1) { + String tempAttribute = rawLocation.substring(0, attributeSepPos); + if (tempAttribute != null && !tempAttribute.isEmpty()) { + attribute = tempAttribute; + } + locationSpec = rawLocation.substring(attributeSepPos+1); + } + + if (locationSpec.charAt(0) == '[') { + parseRectangle(locationSpec); + } + else if (locationSpec.charAt(0) == '(') { + parseCircle(locationSpec); + } + else { + throw new IllegalArgumentException("Illegal location syntax"); + } + } + + public String toString() { + StringBuilder ser = new StringBuilder(); + if (attribute != null) { + ser.append(attribute).append(':'); + } + if (renderRectangle) { + ser.append("[").append(dimensions).append(","); + if (dimensions == 1) { + ser.append(x1).append(","). + append(x2); + } + else { + ser.append(x1).append(","). + append(y1).append(","). + append(x2).append(","). + append(y2); + } + ser.append("]"); + } + if (renderCircle) { + ser.append("(").append(dimensions).append(",").append(x); + if (dimensions == 2) { + ser.append(",").append(y); + } + ser.append(",").append(r). + append(",").append(tableId). + append(",").append(s). + append(",").append(replace); + if (dimensions == 2 && aspect != 0) { + ser.append(",").append(aspect); + } + ser.append(")"); + } + return ser.toString(); + } + + /** + * Returns width of bounding box (actual width if rectangle, bounding square if circle) + * @return width of bounding box + */ + public int getBoundingWidth() { + if (renderCircle) { + return r * 2; + } else { + return x2 - x1; + } + } + + /** + * Returns height of bounding box (actual height if rectangle, bounding square if circle) + * @return height of bounding box + */ + public int getBoundingHeight() { + if (renderCircle) { + return r * 2; + } else { + return y2 - y1; + } + } + + public int hashCode() { + return toString().hashCode(); + } + + public boolean hasAttribute() { + return attribute != null; + } + public String getAttribute() { + return attribute; + } + public void setAttribute(String attributeName) { + attribute = attributeName; + } + + /** check whether this Location contains a 2D circle */ + public boolean isGeoCircle() { + return (renderCircle && dimensions==2); + } + + public boolean hasBoundingBox() { + return renderRectangle; + } + + private void checkGeoCircle() { + if (!isGeoCircle()) { + throw new IllegalArgumentException("only geo circles support this api"); + } + } + + /** + * Obtain degrees latitude (North-South direction); negative numbers are degrees South. + * Expected range is [-90.0,+90.0] only. + * May only be called when isGeoCircle() returns true. + **/ + public double degNS() { + checkGeoCircle(); + return 0.000001 * y; + } + + /** + * Obtain degrees longitude (East-West direction); negative numbers are degrees West. + * Expected range is [-180.0,+180.0] only. + * May only be called when isGeoCircle() returns true. + **/ + public double degEW() { + checkGeoCircle(); + return 0.000001 * x; + } + + /** + * Obtain circle radius (in degrees). + * May only be called when isGeoCircle() returns true. + **/ + public double degRadius() { + checkGeoCircle(); + return 0.000001 * r; + } + + /** + * Encodes the location to the given buffer and returns the length. + * For internal use. + */ + public int encode(ByteBuffer buffer) { + byte[] loc = Utf8.toBytes(toString()); + buffer.put(loc); + return loc.length; + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Ping.java b/container-search/src/main/java/com/yahoo/prelude/Ping.java new file mode 100644 index 00000000000..ce8f1cba399 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Ping.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +/** + * A ping, typically to ask whether backend is alive. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class Ping { + /** How long to wait for a pong */ + private long timeout; + + public Ping() { + this(500); + } + + public Ping(long timeout) { + this.timeout = timeout; + } + + public long getTimeout() { + return timeout; + } + + @Override + public String toString() { + return "Ping(timeout = " + timeout + ")"; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/Pong.java b/container-search/src/main/java/com/yahoo/prelude/Pong.java new file mode 100644 index 00000000000..d2ead51690a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/Pong.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import com.yahoo.fs4.PongPacket; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.statistics.ElapsedTime; + +/** + * An answer from Ping. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class Pong { + + private String pingInfo=""; + private List<ErrorMessage> errors = new ArrayList<>(1); + private List<PongPacket> pongPackets = new ArrayList<>(1); + private ElapsedTime elapsed = new ElapsedTime(); + + public Pong() { + } + public void addError(ErrorMessage error) { + errors.add(error); + } + public ErrorMessage getError(int i) { + return errors.get(i); + } + public int getErrorSize() { + return errors.size(); + } + public void addPongPacket(PongPacket pongPacket) { + pongPackets.add(pongPacket); + } + public PongPacket getPongPacket(int i) { + return pongPackets.get(i); + } + public int getPongPacketsSize() { + return pongPackets.size(); + } + /** Merge all information from another pong into this */ + public void merge(Pong pong) { + if (pong.badResponse()) { + errors.addAll(pong.getErrors()); + } + pongPackets.addAll(pong.getPongPackets()); + } + public List<ErrorMessage> getErrors() { + return Collections.unmodifiableList(errors); + } + public List<PongPacket> getPongPackets() { + return Collections.unmodifiableList(pongPackets); + } + /** @return whether there is an error or not */ + public boolean badResponse() { + return !errors.isEmpty(); + } + + /** Sets information about the ping used to produce this. This is included when returning the tostring of this. */ + public void setPingInfo(String pingInfo) { + if (pingInfo==null) + pingInfo=""; + this.pingInfo=pingInfo; + } + + /** Returns information about the ping use, or "" (never null) if none */ + public String getPingInfo() { return pingInfo; } + + public ElapsedTime getElapsedTime() { + return elapsed; + } + + /** Returns a string which included the ping info (if any) and any errors added to this */ + public @Override String toString() { + StringBuffer m=new StringBuffer("Result of pinging"); + if (pingInfo.length() > 0) { + m.append(" using "); + m.append(pingInfo); + } + m.append(" "); + for (int i=0; i<errors.size(); i++) { + m.append(errors.get(i).toString()); + if (i<errors.size()-1) + m.append(", "); + } + return m.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java b/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java new file mode 100644 index 00000000000..0cec7cfc19d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/SearchDefinition.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import com.yahoo.prelude.Index.Attribute; + +import java.util.HashMap; +import java.util.Map; + +import static com.yahoo.text.Lowercase.toLowerCase; + +/** + * An object for storing information about search definitions in the centralized + * directory services. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +// TODO: Make freezable! +public class SearchDefinition { + + private String name; + + /** A map of all indices in this search definition, indexed by name */ + private Map<String, Index> indices = new HashMap<>(); + + /* + * A map of all indices in this search definition, indexed by lower cased + * name. + */ + private Map<String, Index> lowerCase = new HashMap<>(); + + private String defaultPosition; + + public SearchDefinition(String name) { + this.name = name; + } + + public String getName() { return name; } + + public String getDefaultPosition() { + return defaultPosition; + } + + public void addIndex(Index index) { + indices.put(index.getName(), index); + lowerCase.put(toLowerCase(index.getName()), index); + if (index.isDefaultPosition()) { + defaultPosition = index.getName(); + } + } + + public void addAlias(String alias, String indexName) { + Index old; + + if ((old = indices.get(alias)) != null) { + if (old.getName().equals(indexName)) { + return; + } else { + throw new IllegalArgumentException("Tried adding the alias \"" + + alias + "\" for the index name \"" + indexName + + "\" when the name \"" + alias + + "\" already maps to \"" + old.getName() + "\"."); + } + } + Index index = indices.get(indexName); + if (index == null) { + throw new IllegalArgumentException("Failed adding alias \"" + alias + + "\" for the index name \"" + indexName + + "\" as there is no index with that name available."); + } + indices.put(alias, index); + String lca = toLowerCase(alias); + if (lowerCase.get(lca) == null) { + lowerCase.put(lca, index); + } + } + + public Index getIndex(String name) { + return indices.get(name); + } + + public Index getIndexByLowerCase(String name) { + return lowerCase.get(name); + } + + /** Returns the indices of this as a map */ + public Map<String,Index> indices() { + return indices; + } + + public Index getOrCreateIndex(String name) { + Index idx = getIndex(name); + if (idx != null) { + return idx; + } + idx = new Index(name); + addIndex(idx); + return idx; + } + + public void addCommand(String indexName, String commandString) { + Index index = getOrCreateIndex(indexName); + index.addCommand(commandString); + if (index.isDefaultPosition()) { + defaultPosition = index.getName(); + } + } + + public void fillMatchGroups() { + for (Index i : indices.values()) { + Attribute[] matchGroup = i.getMatchGroup(); + if (matchGroup == null) { + continue; + } + for (Attribute a : matchGroup) { + Index m = getIndex(a.name); + if (m != null) { + a.setTokenizedContent(!m.isAttribute()); + } + } + } + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java b/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java new file mode 100644 index 00000000000..9d6d4f55fb3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/VespaSVersionRetriever.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude; + +import java.io.IOException; +import java.util.jar.Manifest; + +/** + * Retrieves Vespa-Version from the manifest file. + * + * @author tonytv + */ +public class VespaSVersionRetriever { + + public static String getVersion() { + return version; + } + + private static String version = retrieveVersion(); + + private static String retrieveVersion() { + try { + Manifest manifest = new Manifest(VespaSVersionRetriever.class.getResourceAsStream("/META-INF/MANIFEST.MF")); + manifest.getMainAttributes().entrySet(); + return manifest.getMainAttributes().getValue("Vespa-Version"); + } catch (IOException e) { + return "not available."; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java b/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java new file mode 100644 index 00000000000..0bdd6e0fdfa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cache/Cache.java @@ -0,0 +1,274 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cache; + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; + +import com.yahoo.cache.SizeCalculator; +import com.yahoo.search.Result; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +/** + * <p>A generic cache which keeps the total memory consumed by its content + * below a configured maximum.</p> + * + * <p>Thread safe.</p> + * + * @author vegardh + */ +public class Cache<K, V> { + private Value elems = null; + private Value entrySizes = null; + + private Map<CacheKey<K>,CacheValue<K, V>> content=new LinkedHashMap<>(12500, 1.0f, true); + private SizeCalculator calc = new SizeCalculator(); + private long maxSizeBytes; + + private long currentSizeBytes=0; + + /** The time an element is allowed to live, negative for indefinite lifespan */ + private long timeToLiveMillis=-1; + + /** The max allowed size of an entry */ + private long maxEntrySizeBytes=10000; + + /** + * Creates a new cache + * + * @param maxSizeBytes the max size in bytes this cache is permitted to consume, + * including Result objects and Query keys + * @param timeToLiveMillis a negative value means unlimited time + * @param manager the current Statistics manager acquired by injection + */ + public Cache(long maxSizeBytes,long timeToLiveMillis, long maxEntrySizeBytes, Statistics manager) { + this.maxSizeBytes=maxSizeBytes; + this.timeToLiveMillis=timeToLiveMillis; + this.maxEntrySizeBytes=maxEntrySizeBytes; + initStats(manager); + } + + private void initStats(Statistics manager) { + elems = new Value("querycache_elems", manager, new Value.Parameters() + .setLogRaw(true).setNameExtension(true).setLogMax(true)); + entrySizes = new Value("querycache_entry_sizes", manager, + new Value.Parameters().setLogRaw(false).setLogMean(true) + .setNameExtension(true).setLogMax(true)); + } + + private synchronized CacheValue<K, V> synchGet(CacheKey<K> k) { + return content.get(k); + } + + private synchronized boolean synchPut(K key,V value, long keySizeBytes, long valueSizeBytes) { + // log.info("Put "+key.toString()+ " key size:"+keySizeBytes+" val size:"+valueSizeBytes); + makeRoomForBytes(valueSizeBytes+keySizeBytes); + CacheKey<K> cacheKey = new CacheKey<>(keySizeBytes, key); + CacheValue<K, V> cacheValue; + if (timeToLiveMillis<0) { + cacheValue=new CacheValue<>(valueSizeBytes,value, cacheKey); + } else { + cacheValue=new AgingCacheValue<>(valueSizeBytes,value, cacheKey); + } + currentSizeBytes+=(valueSizeBytes+keySizeBytes); + elems.put(content.size()); + content.put(cacheKey, cacheValue); + return true; + } + + /** + * Attempts to add a value to the cache + * + * @param key the key of the value + * @param value the value to add + * @return true if the value was added, false if it could not be added + */ + public boolean put(K key,V value) { + if (value instanceof Result) { // Optimized for CachingSearcher. Assuming the key is the Query. + long totalSizeBytes = calc.sizeOf(value); // Result has a Query field + if (tooBigToCache(totalSizeBytes)) { + return false; + } + entrySizes.put(totalSizeBytes); + return synchPut(key, value, 0, totalSizeBytes); + } + long keySizeBytes=calc.sizeOf(key); + long valueSizeBytes=calc.sizeOf(value); + if (tooBigToCache(keySizeBytes+valueSizeBytes)) { + return false; + } + entrySizes.put(keySizeBytes+valueSizeBytes); + return synchPut(key, value, keySizeBytes, valueSizeBytes); + } + + /** + * Don't cache elems that are too big, even if there's space + */ + private boolean tooBigToCache(long totalSize) { + if (totalSize > maxEntrySizeBytes) { + return true; + } + if (totalSize > maxSizeBytes) { + return true; + } + return false; + } + + private void makeRoomForBytes(long bytes) { + if ((maxSizeBytes-currentSizeBytes) > bytes) { + return; + } + if (content.isEmpty()) { + return; + } + for (Iterator<Map.Entry<CacheKey<K>, CacheValue<K, V>>> i = content.entrySet().iterator() ; i.hasNext() ; ) { + Map.Entry<CacheKey<K>, CacheValue<K, V>> entry = i.next(); + CacheKey<K> key = entry.getKey(); + CacheValue<K, V> value = entry.getValue(); + // Can't call this.removeField(), breaks iterator. + i.remove(); // Access order: first ones are LRU. + currentSizeBytes-=key.sizeBytes(); + currentSizeBytes-=value.sizeBytes(); + if ((maxSizeBytes-currentSizeBytes) > bytes) { + break; + } + } + } + + public boolean containsKey(K k) { + return content.containsKey(new CacheKey<>(-1, k)); + } + + /** Returns a value, if it is present in the cache */ + public V get(K key) { + // Currently it works to make a new CacheKey object without size + // because we have changed hashCode() there. + CacheKey<K> cacheKey = new CacheKey<>(-1, key); + CacheValue<K, V> value=synchGet(cacheKey); + if (value==null) { + return null; + } + if (timeToLiveMillis<0) { + return value.value(); + } + + if (value.expired(timeToLiveMillis)) { + // There was a value, which has now expired + remove(key); + return null; + } else { + return value.value(); + } + } + + /** + * Removes a cache value if present + * + * @return true if the value was removed, false if it was not present + */ + public synchronized boolean remove(K key) { + CacheValue<K, V> value=content.remove(key); + if (value==null) { + return false; + } + currentSizeBytes-=value.sizeBytes(); + currentSizeBytes-=value.getKey().sizeBytes(); + elems.put(content.size()); + return true; + } + + public int size() { + return content.size(); + } + + private static class CacheKey<K> { + private long sizeBytes; + private K key; + public CacheKey(long sizeBytes,K key) { + this.sizeBytes=sizeBytes; + this.key=key; + } + + public long sizeBytes() { + return sizeBytes; + } + + public K getKey() { + return key; + } + + public int hashCode() { + return key.hashCode(); + } + + @SuppressWarnings("rawtypes") + public boolean equals(Object k) { + if (key==null) { + return false; + } + if (k==null) { + return false; + } + if (k instanceof CacheKey) { + return key.equals(((CacheKey)k).getKey()); + } + return false; + } + + public String toString() { + return key.toString(); + } + + } + + private static class CacheValue<K, V> { + private long sizeBytes; + private V value; + private CacheKey<K> key; + public CacheValue(long sizeBytes, V value, CacheKey<K> key) { + this.sizeBytes=sizeBytes; + this.value=value; + this.key = key; + } + + public boolean expired(long ttl) { + return false; + } + + public V value() { + return value; + } + + public long sizeBytes() { + return sizeBytes; + } + + public CacheKey<K> getKey() { + return key; + } + + public String toString() { + return value.toString(); + } + + } + + private static class AgingCacheValue<K, V> extends CacheValue<K, V> { + private long birthTimeMillis; + + public AgingCacheValue(long sizeBytes,V value, CacheKey<K> key) { + super(sizeBytes,value, key); + this.birthTimeMillis=System.currentTimeMillis(); + } + + public long ageMillis() { + return System.currentTimeMillis()-birthTimeMillis; + } + + public boolean expired(long ttl) { + return (ageMillis() >= ttl); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java b/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java new file mode 100644 index 00000000000..d885422ce57 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cache/QueryCacheKey.java @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cache; + +import com.yahoo.search.Query; + +public class QueryCacheKey { + private Query query; + private int offset; + private int hits; + + public QueryCacheKey(Query query) { + this.query = query; + this.offset = query.getOffset(); + this.hits = query.getHits(); + } + + public boolean equals(Object key) { + if (key==null) { + return false; + } + if (query==null) { + return false; + } + if (key instanceof QueryCacheKey) { + QueryCacheKey ckey = (QueryCacheKey)key; + boolean res = equalQueryWith(ckey) && equalPathWith(ckey); + return res; + } + return false; + } + + private boolean equalQueryWith(QueryCacheKey other) { + return query.equals(other.getQuery()); + } + + private boolean equalPathWith(QueryCacheKey other) { + if (other == null) return false; + if (other.getQuery() == null) return false; + + return query.getHttpRequest().getUri().getPath().equals(other.getQuery().getHttpRequest().getUri().getPath()); + } + + public int getHits() { + return hits; + } + + public int getOffset() { + return offset; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query newQuery) { + query = newQuery; + } + + public String toString() { + if (query==null) { + return super.toString(); + } + return query.toString(); + } + + public int hashCode() { + if (query==null) { + return super.hashCode(); + } + int ret = query.hashCode(); + return ret; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java new file mode 100644 index 00000000000..77ee36785f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterMonitor.java @@ -0,0 +1,146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import java.util.Map; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.yahoo.component.provider.Freezable; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.result.ErrorMessage; + +/** + * Monitors of a cluster of remote nodes. The monitor uses an internal thread + * for node monitoring. + * + * @author <a href="mailto:jon.bratseth@yahoo-inc.com">Jon S Bratseth</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ClusterMonitor implements Runnable, Freezable { + + private final MonitorConfiguration configuration; + + private final static Logger log = Logger.getLogger(ClusterMonitor.class.getName()); + + private final ClusterSearcher nodeManager; + + private final VipStatus vipStatus; + + /** A map from Node to corresponding MonitoredNode */ + private final Map<VespaBackEndSearcher, NodeMonitor> nodeMonitors = new java.util.IdentityHashMap<>(); + ScheduledFuture<?> future; + + private boolean isFrozen = false; + + ClusterMonitor(final ClusterSearcher manager, final QrMonitorConfig monitorConfig, VipStatus vipStatus) { + configuration = new MonitorConfiguration(monitorConfig); + nodeManager = manager; + this.vipStatus = vipStatus; + log.fine("checkInterval is " + configuration.getCheckInterval() + " ms"); + } + + /** Returns the configuration of this cluster monitor */ + MonitorConfiguration getConfiguration() { + return configuration; + } + + void startPingThread() { + if (!isFrozen()) { + throw new IllegalStateException( + "Do not start the monitoring thread before the set of" + +" nodes to monitor is complete/the ClusterMonitor is frozen."); + } + future = nodeManager.getScheduledExecutor().scheduleAtFixedRate(this, 30 * 1000, configuration.getCheckInterval(), TimeUnit.MILLISECONDS); + } + + /** + * Adds a new node for monitoring. + */ + void add(final VespaBackEndSearcher node) { + if (isFrozen()) { + throw new IllegalStateException( + "Can not add new nodes after ClusterMonitor has been frozen."); + } + final NodeMonitor monitor = new NodeMonitor(node); + nodeMonitors.put(node, monitor); + } + + /** Called from ClusterSearcher/NodeManager when a node failed */ + void failed(final VespaBackEndSearcher node, final ErrorMessage error) { + final NodeMonitor monitor = nodeMonitors.get(node); + final boolean wasWorking = monitor.isWorking(); + monitor.failed(error); + if (wasWorking && !monitor.isWorking()) { + // was warning, see VESPA-1922 + log.info("Failed monitoring node '" + node + "' due to '" + error); + nodeManager.failed(node); + } + updateVipStatus(); + } + + /** Called when a node responded */ + void responded(final VespaBackEndSearcher node, boolean hasDocumentsOnline) { + final NodeMonitor monitor = nodeMonitors.get(node); + final boolean wasFailing = !monitor.isWorking(); + monitor.responded(hasDocumentsOnline); + if (wasFailing && monitor.isWorking()) { + log.info("Failed node '" + node + "' started working again."); + nodeManager.working(monitor.getNode()); + } + updateVipStatus(); + } + + private void updateVipStatus() { + boolean hasWorkingNodesWithDocumentsOnline = false; + for (NodeMonitor node : nodeMonitors.values()) { + if (node.isWorking() && node.searchNodesOnline()) { + hasWorkingNodesWithDocumentsOnline = true; + break; + } + } + if (hasWorkingNodesWithDocumentsOnline) { + vipStatus.addToRotation(this); + } else { + vipStatus.removeFromRotation(this); + } + } + + /** + * Ping all nodes which needs pinging to discover state changes + */ + private void ping() throws InterruptedException { + for (final NodeMonitor monitor : nodeMonitors.values()) { + nodeManager.ping(monitor.getNode()); + } + } + + @Override + public void run() { + log.finest("Activating ping"); + try { + ping(); + } catch (final Exception e) { + log.log(Level.WARNING, "Error in monitor thread", e); + } + } + + public void shutdown() throws InterruptedException { + if (future != null) { + future.cancel(true); + } + } + + @Override + public void freeze() { + isFrozen = true; + + } + + @Override + public boolean isFrozen() { + return isFrozen; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java new file mode 100644 index 00000000000..88982fa1b69 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/ClusterSearcher.java @@ -0,0 +1,657 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import com.yahoo.collections.Tuple2; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.Chain; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.concurrent.Receiver; +import com.yahoo.concurrent.Receiver.MessageState; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.container.protect.Error; +import com.yahoo.fs4.PacketDumper; +import com.yahoo.fs4.PongPacket; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.log.LogLevel; +import com.yahoo.search.dispatch.Dispatcher; +import com.yahoo.prelude.fastsearch.FS4ResourcePool; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.fastsearch.CacheControl; +import com.yahoo.prelude.fastsearch.CacheParams; +import com.yahoo.prelude.fastsearch.ClusterParams; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.FastSearcher; +import com.yahoo.prelude.fastsearch.SummaryParameters; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.search.query.ParameterParser; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; +import com.yahoo.vespa.config.search.DispatchConfig; +import com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledExecutorService; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang.StringUtils; + +import static com.yahoo.container.QrSearchersConfig.Searchcluster.Indexingmode.STREAMING; + +/** + * A searcher which forwards to a cluster of monitored native Vespa backends. + * + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +@After("*") +public class ClusterSearcher extends Searcher { + + private final static Logger log = Logger.getLogger(ClusterSearcher.class.getName()); + + private final ClusterMonitor monitor; + + private final Hasher hasher; + + private final Value cacheHitRatio; + + private final String clusterModelName; + + private final List<Backend> backends = new ArrayList<>(); + + // The set of document types contained in this search cluster + private final Set<String> documentTypes; + + // Mapping from rank profile names to document types containing them + private final Map<String, Set<String>> rankProfiles = new HashMap<>(); + + private final boolean failoverToRemote; + + private final FS4ResourcePool fs4ResourcePool; + + private final long maxQueryTimeout; // in milliseconds + private final static long DEFAULT_MAX_QUERY_TIMEOUT = 600000L; + + private final long maxQueryCacheTimeout; // in milliseconds + private final static long DEFAULT_MAX_QUERY_CACHE_TIMEOUT = 10000L; + + /** + * Creates a new ClusterSearcher. + */ + public ClusterSearcher(ComponentId id, + QrSearchersConfig qrsConfig, + ClusterConfig clusterConfig, + DocumentdbInfoConfig documentDbConfig, + LegacyEmulationConfig emulationConfig, + QrMonitorConfig monitorConfig, + DispatchConfig dispatchConfig, + Statistics manager, + FS4ResourcePool listeners, + VipStatus vipStatus) { + super(id); + this.hasher = new Hasher(); + this.fs4ResourcePool = listeners; + monitor = new ClusterMonitor(this, monitorConfig, vipStatus); + final int searchClusterIndex = clusterConfig.clusterId(); + clusterModelName = clusterConfig.clusterName(); + final QrSearchersConfig.Searchcluster searchClusterConfig = getSearchClusterConfigFromClusterName(qrsConfig, clusterModelName); + documentTypes = new LinkedHashSet<>(); + failoverToRemote = clusterConfig.failoverToRemote(); + Dispatcher dispatcher = new Dispatcher(dispatchConfig); + + final String eventName = clusterModelName + ".cache_hit_ratio"; + cacheHitRatio = new Value(eventName, manager, new Value.Parameters() + .setNameExtension(false).setLogRaw(false).setLogMean(true)); + + maxQueryTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryTimeout(), DEFAULT_MAX_QUERY_TIMEOUT); + maxQueryCacheTimeout = ParameterParser.asMilliSeconds(clusterConfig.maxQueryCacheTimeout(), + DEFAULT_MAX_QUERY_CACHE_TIMEOUT); + + final CacheParams cacheParams = new CacheParams(createCache(clusterConfig, clusterModelName)); + final SummaryParameters docSumParams = new SummaryParameters(qrsConfig + .com().yahoo().prelude().fastsearch().FastSearcher().docsum() + .defaultclass()); + + for (final DocumentdbInfoConfig.Documentdb docDb : documentDbConfig.documentdb()) { + String docTypeName = docDb.name(); + documentTypes.add(docTypeName); + + for (final DocumentdbInfoConfig.Documentdb.Rankprofile profile : docDb.rankprofile()) { + addValidRankProfile(profile.name(), docTypeName); + } + } + + boolean gotExpectedBackend = false; + if (searchClusterConfig.indexingmode() == STREAMING) { + final VdsStreamingSearcher searcher = vdsCluster(searchClusterIndex, + searchClusterConfig, cacheParams, emulationConfig, docSumParams, + documentDbConfig); + addBackendSearcher(searcher); + gotExpectedBackend = true; + } else { + for (int i = 0; i < searchClusterConfig.dispatcher().size(); i++) { + final Backend b = createBackend( + searchClusterConfig.dispatcher(i)); + final FastSearcher searcher = searchDispatch(searchClusterIndex, + searchClusterConfig, cacheParams, emulationConfig, docSumParams, + documentDbConfig, b, dispatcher, i); + try { + searcher.setLocalDispatching(!isRemote(searchClusterConfig.dispatcher(i).host())); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + backends.add(b); + addBackendSearcher(searcher); + gotExpectedBackend |= searcher.isLocalDispatching(); + } + } + if (!gotExpectedBackend) { + log.log(Level.SEVERE, "ClusterSearcher should have a local top level dispatch." + + " The possibility to configure dispatchers explicitly will be removed" + + " in a future release."); + } + hasher.running = true; + monitor.freeze(); + monitor.startPingThread(); + } + + private static QrSearchersConfig.Searchcluster getSearchClusterConfigFromClusterName(QrSearchersConfig config, String name) { + for (QrSearchersConfig.Searchcluster searchCluster : config.searchcluster()) { + if (searchCluster.name().equals(name)) { + return searchCluster; + } + } + return null; + } + + /** + * Returns false if this host is local. + */ + boolean isRemote(String host) throws UnknownHostException { + InetAddress dispatchHost = InetAddress.getByName(host); + if (dispatchHost.isLoopbackAddress()) { + return false; + } else { + String localName; + try { + localName = InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + // Macs unfortunately can tell their own name but does not know + // it, so if we run this model on a mac we'll end up here. + return false; + } + return !localName.equals(dispatchHost.getCanonicalHostName()); + } + } + + private static ClusterParams makeClusterParams(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + LegacyEmulationConfig emulConfig, + int dispatchIndex) { + return new ClusterParams(searchclusterIndex, + "sc" + searchclusterIndex + ".num" + dispatchIndex, + searchClusterConfig.rowbits(), + emulConfig); + } + + private static FastSearcher searchDispatch(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + CacheParams cacheParams, + LegacyEmulationConfig emulConfig, + SummaryParameters docSumParams, + DocumentdbInfoConfig documentdbInfoConfig, + Backend backend, + Dispatcher dispatcher, + int i) { + ClusterParams clusterParams = makeClusterParams(searchclusterIndex, + searchClusterConfig, + emulConfig, i); + return new FastSearcher(backend, dispatcher, docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + } + + private static VdsStreamingSearcher vdsCluster(int searchclusterIndex, + QrSearchersConfig.Searchcluster searchClusterConfig, + CacheParams cacheParams, + LegacyEmulationConfig emulConfig, + SummaryParameters docSumParams, + DocumentdbInfoConfig documentdbInfoConfig) { + ClusterParams clusterParams = makeClusterParams(searchclusterIndex, + searchClusterConfig, + emulConfig, 0); + VdsStreamingSearcher searcher = (VdsStreamingSearcher) VespaBackEndSearcher + .getSearcher("com.yahoo.vespa.streamingvisitors.VdsStreamingSearcher"); + searcher.setSearchClusterConfigId(searchClusterConfig + .rankprofiles().configid()); + searcher.setStorageClusterRouteSpec(searchClusterConfig + .storagecluster().routespec()); + searcher.init(docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + return searcher; + } + + /** Do not use, for internal testing purposes only. **/ + ClusterSearcher(Set<String> documentTypes) { + this.hasher = new Hasher(); + this.failoverToRemote = false; + this.documentTypes = documentTypes; + monitor = new ClusterMonitor(this, new QrMonitorConfig(new QrMonitorConfig.Builder()), new VipStatus()); + cacheHitRatio = new Value( + "com.yahoo.prelude.cluster.ClusterSearcher.ClusterSearcher().dummy", + Statistics.nullImplementation, new Value.Parameters()); + clusterModelName = "testScenario"; + fs4ResourcePool = null; + maxQueryTimeout = DEFAULT_MAX_QUERY_TIMEOUT; + maxQueryCacheTimeout = DEFAULT_MAX_QUERY_CACHE_TIMEOUT; + } + + public Map<String, Backend.BackendStatistics> getBackendStatistics() { + Map<String, Backend.BackendStatistics> backendStatistics = new TreeMap<>(); + for (final Backend backend : backends) { + backendStatistics.put(backend.toString(), backend.getStatistics()); + } + return backendStatistics; + } + + private Backend createBackend(final QrSearchersConfig.Searchcluster.Dispatcher disp) { + return fs4ResourcePool.getBackend(disp.host(), disp.port()); + } + + private static CacheControl createCache(ClusterConfig config, String clusterModelName) { + log.log(Level.INFO, "Enabling cache for search cluster " + + clusterModelName + " (size=" + config.cacheSize() + + ", timeout=" + config.cacheTimeout() + ")"); + + return new CacheControl(config.cacheSize(), config.cacheTimeout()); + } + + public String getClusterModelName() { + return clusterModelName; + } + + ClusterMonitor getMonitor() { + return monitor; + } + + void addBackendSearcher(VespaBackEndSearcher searcher) { + monitor.add(searcher); + hasher.add(searcher); + } + + void addValidRankProfile(String profileName, String docTypeName) { + if (!rankProfiles.containsKey(profileName)) { + rankProfiles.put(profileName, new HashSet<>()); + } + rankProfiles.get(profileName).add(docTypeName); + } + + void setValidRankProfile(String profileName, Set<String> documentTypes) { + rankProfiles.put(profileName, documentTypes); + } + + /** + * Returns an error if the document types do not have the requested rank + * profile. For the case of multiple document types, only returns an + * error if we have restricted the set of documents somehow. This is + * because when searching over all doc types, common ancestors might + * not have the requested rank profile and failing on that basis is + * probably not reasonable. + * + * @param query query + * @param docTypes set of requested doc types for this query + * @return null if request rank profile is ok for the requested + * doc types, a result with error message if not. + */ + private Result checkValidRankProfiles(Query query, Set<String> docTypes) { + String rankProfile = query.getRanking().getProfile(); + Set<String> invalidInDocTypes = null; + Set<String> rankDocTypes = rankProfiles.get(rankProfile); + + if (rankDocTypes == null) { + // ranking profile does not exist in any document type + invalidInDocTypes = docTypes; + } + else if (docTypes.size() == 1) { + // one document type, fails if invalid rank profile + if (!rankDocTypes.contains(docTypes.iterator().next())) { + invalidInDocTypes = docTypes; + } + } + else { + // multiple document types, only fail when restricting doc types + Set<String> restrict = query.getModel().getRestrict(); + Set<String> sources = query.getModel().getSources(); + boolean validate = restrict != null && !restrict.isEmpty(); + validate = validate || sources != null && !sources.isEmpty(); + if (validate && !rankDocTypes.containsAll(docTypes)) { + invalidInDocTypes = new HashSet<>(docTypes); + invalidInDocTypes.removeAll(rankDocTypes); + } + } + + if (invalidInDocTypes != null && !invalidInDocTypes.isEmpty()) { + String plural = invalidInDocTypes.size() > 1 ? "s" : ""; + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Requested rank profile '" + rankProfile + + "' is undefined for document type" + plural + " '" + + StringUtils.join(invalidInDocTypes.iterator(), ", ") + "'")); + } + + return null; + } + + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + Query query = result.getQuery(); + int tries = 0; + + do { + // The loop is in case there are other searchers available + // able to produce results + VespaBackEndSearcher searcher = hasher.select(tries++); + if (searcher != null) { + if (query.getTimeLeft() > 0) { + doFill(searcher, result, summaryClass, execution); + } else { + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createTimeout("No time left to get summaries")); + } + } + } else { + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createNoBackendsInService("Could not fill result")); + } + } + // no error: good result, let's return + if (result.hits().getError() == null) { + return; + } + + } while (tries < hasher.getNodeCount() && failoverToRemote); + } + + public void doFill(Searcher searcher, Result result, String summaryClass, Execution execution) { + searcher.fill(result, summaryClass, execution); + updateCacheHitRatio(result, result.getQuery()); + } + + private void updateCacheHitRatio(Result result, Query query) { + // result.isCached() looks at the contained hits, so if there are no + // hits, the result will be treated as cached, even though the backend + // was queried. + if (result.hits().getError() == null + && result.hits().getConcreteSize() > 0) { + + if (result.isCached()) { + cacheHit(); + } else if (!query.getNoCache()) { + cacheMiss(); + } + } + } + + @Override + public Result search(com.yahoo.search.Query query, Execution execution) { + Result result; + int tries = 0; + + do { + // The loop is in case there are other searchers available + // able to produce results + validateQueryTimeout(query); + validateQueryCache(query); + VespaBackEndSearcher searcher = hasher.select(tries++); + if (searcher == null) { + return new Result(query, ErrorMessage.createNoBackendsInService("Could not search")); + } + if (query.getTimeLeft() <= 0) { + return new Result(query, ErrorMessage.createTimeout("No time left for searching")); + } + + result = doSearch(searcher, query, execution); + + // no error: good result, let's return + if (result.hits().getError() == null) { + return result; + } + if (result.hits().getError().getCode() == Error.TIMEOUT.code) { + return result; // Retry is unlikely to help + } + if (result.hits().getError().getCode() == Error.INVALID_QUERY_PARAMETER.code) { + return result; // Retry is unlikely to help here as well + } + } while (tries < hasher.getNodeCount()); + + // only error-result gets returned here. + return result; + } + + private void validateQueryTimeout(Query query) { + if (query.getTimeout() > maxQueryTimeout) { + log.warning("Query timeout (" + query.getTimeout() + " ms) > max query timeout (" + maxQueryTimeout + " ms) for '" + + query.toString() + "'. Setting timeout to " + maxQueryTimeout + " ms."); + query.setTimeout(maxQueryTimeout); + } + } + + private void validateQueryCache(Query query) { + if (query.getRanking().getQueryCache() && query.getTimeout() > maxQueryCacheTimeout) { + log.warning("Query timeout (" + query.getTimeout() + " ms) > max query cache timeout (" + maxQueryCacheTimeout + " ms) for '" + + query.toString() + "'. Disabling query cache."); + query.getRanking().setQueryCache(false); + } + } + + private Result doSearch(Searcher searcher, Query query, Execution execution) { + Result result; + if (documentTypes.size() > 1) { + result = searchMultipleDocumentTypes(searcher, query, execution); + } else { + String docType = documentTypes.iterator().next(); + + Result invalidRankProfile = checkValidRankProfiles(query, documentTypes); + if (invalidRankProfile != null) { + return invalidRankProfile; + } + + query.getModel().setRestrict(docType); + result = searcher.search(query, execution); + } + updateCacheHitRatio(result, query); + return result; + } + + + private Result searchMultipleDocumentTypes(Searcher searcher, Query query, Execution execution) { + Set<String> docTypes = resolveDocumentTypes(query, execution.context().getIndexFacts()); + + Result invalidRankProfile = checkValidRankProfiles(query, docTypes); + if (invalidRankProfile != null) { + return invalidRankProfile; + } + + List<Query> queries = createQueries(query, docTypes); + if (queries.size() == 1) { + return searcher.search(queries.get(0), execution); + } else { + Result mergedResult = new Result(query.clone()); + for (Query q : queries) { + Result result = searcher.search(q, execution); + mergedResult.mergeWith(result); + mergedResult.hits().addAll(result.hits().asUnorderedHits()); + } + // Should we trim the merged result? + if (query.getOffset() > 0 || query.getHits() < mergedResult.hits().size()) { + if (mergedResult.getHitOrderer() != null) { + // Make sure we have the necessary data for sorting + searcher.fill(mergedResult, Execution.ATTRIBUTEPREFETCH, execution); + } + mergedResult.hits().trim(query.getOffset(), query.getHits()); + } + return mergedResult; + } + } + + Set<String> resolveDocumentTypes(Query query, IndexFacts indexFacts) { + Set<String> restrict = query.getModel().getRestrict(); + if (restrict == null || restrict.isEmpty()) { + Set<String> sources = query.getModel().getSources(); + if (sources == null || sources.isEmpty()) { + return documentTypes; + } else { + return new HashSet<>(indexFacts.newSession(sources, Collections.emptyList(), documentTypes).documentTypes()); + } + } else { + return filterValidDocumentTypes(restrict); + } + } + + private Set<String> filterValidDocumentTypes(Collection<String> restrict) { + Set<String> retval = new LinkedHashSet<>(); + for (String docType : restrict) { + if (docType != null && documentTypes.contains(docType)) { + retval.add(docType); + } + } + return retval; + } + + private List<Query> createQueries(Query query, Set<String> docTypes) { + List<Query> retval = new ArrayList<>(docTypes.size()); + if (docTypes.size() == 1) { + query.getModel().setRestrict(docTypes.iterator().next()); + retval.add(query); + } else if ( ! docTypes.isEmpty() ) { + for (String docType : docTypes) { + Query q = query.clone(); + q.setOffset(0); + q.setHits(query.getOffset() + query.getHits()); + q.getModel().setRestrict(docType); + retval.add(q); + } + } + return retval; + } + + private void cacheHit() { + cacheHitRatio.put(1.0); + } + + private void cacheMiss() { + cacheHitRatio.put(0.0); + } + + /** NodeManager method, called from ClusterMonitor. */ + void working(VespaBackEndSearcher node) { + hasher.add(node); + } + + /** Called from ClusterMonitor. */ + void failed(VespaBackEndSearcher node) { + hasher.remove(node); + } + + /** + * Pinging a node, called from ClusterMonitor. + */ + void ping(VespaBackEndSearcher node) throws InterruptedException { + log.fine("Sending ping to: " + node); + Pinger pinger = new Pinger(node); + + getExecutor().execute(pinger); + Pong pong = pinger.getPong(); // handles timeout + if (pong == null) { + monitor.failed(node, ErrorMessage.createNoAnswerWhenPingingNode("Ping thread timed out.")); + } else if (pong.badResponse()) { + monitor.failed(node, pong.getError(0)); + } else { + monitor.responded(node, backendCanServeDocuments(pong)); + } + } + + private boolean backendCanServeDocuments(Pong pong) { + List<PongPacket> wireReply = pong.getPongPackets(); + if (wireReply.size() == 0) { + return true; // streaming search does not add PongPacket instances + } + if (wireReply.size() > 1) { + log.log(LogLevel.ERROR, "ClusterSearcher ping got more than one pong packet (" + wireReply.size() + + "), this means basic implementation assumptions now are out of sync."); + } + + PongPacket pongPacket = wireReply.get(0); + if (pongPacket.getActiveNodes().isPresent() && pongPacket.getActiveNodes().get() == 0) { + return false; + } else { + return true; + } + } + + public void dumpPackets(PacketDumper.PacketType packetType, boolean on) throws IOException { + for (Backend b : backends) { + b.dumpPackets(packetType, on); + } + } + + @Override + public void deconstruct() { + try { + monitor.shutdown(); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + ExecutorService getExecutor() { + return fs4ResourcePool.getExecutor(); + } + + ScheduledExecutorService getScheduledExecutor() { + return fs4ResourcePool.getScheduledExecutor(); + } + + private class Pinger implements Runnable { + + private final Searcher searcher; + private final Ping pingChallenge = new Ping(monitor.getConfiguration().getRequestTimeout()); + private final Receiver<Pong> pong = new Receiver<>(); + + public Pinger(final Searcher searcher) { + this.searcher = searcher; + } + + @Override + public void run() { + pong.put(createExecution().ping(pingChallenge)); + } + + private Execution createExecution() { + return new Execution(new Chain<>(searcher), + new Execution.Context(null, null, null, null, null)); + } + + public Pong getPong() throws InterruptedException { + Tuple2<MessageState, Pong> reply = pong.get(pingChallenge.getTimeout() + 150); + if (reply.first != MessageState.VALID) { + return null; + } else { + return reply.second; + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java new file mode 100644 index 00000000000..a78b5d6e1b5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/Hasher.java @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; + +import com.yahoo.container.handler.VipStatus; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; + +/** + * Failover between multiple Vespa backends. + * + * @author bratseth + * @author Prashanth B. Bhat + * @author Steinar Knutsen + */ +public class Hasher { + + boolean running = false; + + private static final Logger log = Logger.getLogger(Hasher.class.getName()); + private static final Random tldSeeder = new Random(); + + private volatile VespaBackEndSearcher[] allNodes = new VespaBackEndSearcher[0]; + private volatile VespaBackEndSearcher[] localNodes = new VespaBackEndSearcher[0]; + + private AtomicInteger avoidAllQrsHitSameTld = new AtomicInteger(tldSeed()); + + /** + * Creates a hasher independent of the {@linkplain VipStatus programmatic VIP API}. + */ + public Hasher() { + } + + private static synchronized int tldSeed() { + return tldSeeder.nextInt(); + } + + static private VespaBackEndSearcher[] addNode(VespaBackEndSearcher node, VespaBackEndSearcher[] oldNodes) { + VespaBackEndSearcher[] newNodes = new VespaBackEndSearcher[oldNodes.length + 1]; + System.arraycopy(oldNodes, 0, newNodes, 0, oldNodes.length); + newNodes[oldNodes.length] = node; + return newNodes; + } + /** + * Make a node available for search. + */ + public void add(VespaBackEndSearcher node) { + allNodes = addNode(node, allNodes); + + if (node.isLocalDispatching()) { + localNodes = addNode(node, localNodes); + } + } + + private VespaBackEndSearcher[] removeNode(VespaBackEndSearcher node, VespaBackEndSearcher[] nodes) { + VespaBackEndSearcher[] newNodes = null; + for (VespaBackEndSearcher n : nodes) { + if (n == node) { + newNodes = new VespaBackEndSearcher[nodes.length - 1]; + break; + } + } + if (newNodes != null) { + int numToKeep = 0; + + for (VespaBackEndSearcher n : nodes) { + if (n != node) { + newNodes[numToKeep++] = n; + } + } + return newNodes; + } + return nodes; + } + + /** Removes a node */ + public void remove(VespaBackEndSearcher node) { + if (allNodes.length == 0) { + return; + } + + VespaBackEndSearcher[] newNodes = removeNode(node, allNodes); + if (newNodes != allNodes) { + if (running && newNodes.length == 0) { + log.log(LogLevel.WARNING, "No longer any nodes for this cluster when" + + " removing malfunctioning " + node.toString() + "."); + } + allNodes = newNodes; + } + + newNodes = removeNode(node, localNodes); + if (newNodes != localNodes) { + if (running && localNodes.length == 0) { + log.log(LogLevel.WARNING, "Removing malfunctioning " + node.toString() + + " from traffic leaves no local dispatchers, performance" + + " degradation is to expected."); + } + localNodes = newNodes; + } + } + + public int getNodeCount() { + return allNodes.length; + } + + /** + * Return a node, prefer local nodes, try to skip already hit nodes. + * + * @param trynum + * hint to skip already used nodes + * @return the selected node, or null if this hasher has no nodes + */ + public VespaBackEndSearcher select(int trynum) { + VespaBackEndSearcher[] nodes = allNodes; + + if (nodes.length == 0) { + return null; + } else { + if (localNodes.length > 0) { + nodes = localNodes; + if (localNodes.length == 1) { + return nodes[0]; + } else { + return nodes[Math.abs(avoidAllQrsHitSameTld.incrementAndGet() % nodes.length)]; + } + } else { + return nodes[Math.abs(avoidAllQrsHitSameTld.incrementAndGet() % nodes.length)]; + } + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java b/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java new file mode 100644 index 00000000000..9d2074cb2c0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/MonitorConfiguration.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +/** + * The configuration of a cluster monitor instance + * + * @author bratseth + * @author Steinar Knutsen + */ +public class MonitorConfiguration { + + /** + * The interval in ms between consecutive checks of the monitored nodes + */ + private final long checkInterval = 1000; + + /** + * The number of milliseconds to attempt to complete a request before giving + * up + */ + private long requestTimeout = 2700; + + public MonitorConfiguration(final QrMonitorConfig config) { + requestTimeout = config.requesttimeout(); + } + + /** + * Returns the interval between each ping of idle or failing nodes Default + * is 1000ms + */ + public long getCheckInterval() { + return checkInterval; + } + + /** + * Sets the number of milliseconds to attempt to service a request (at + * different nodes) before giving up. + */ + public void setRequestTimeout(final long timeout) { + requestTimeout = timeout; + } + + /** + * Returns the number of milliseconds to attempt to service a request (at + * different nodes) before giving up. Default is 2700 ms. + */ + public long getRequestTimeout() { + return requestTimeout; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java new file mode 100644 index 00000000000..b6fe4b69052 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/NodeMonitor.java @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster; + +import static com.yahoo.container.protect.Error.BACKEND_COMMUNICATION_ERROR; +import static com.yahoo.container.protect.Error.NO_ANSWER_WHEN_PINGING_NODE; + +import java.util.logging.Logger; + +import com.yahoo.prelude.fastsearch.VespaBackEndSearcher; +import com.yahoo.search.result.ErrorMessage; + +/** + * A node monitor is responsible for maintaining the state of a monitored node. + * It has the following properties: + * <ul> + * <li>A node is taken out of operation if it gives no response in 10 s</li> + * <li>A node is put back in operation when it responds correctly again</li> + * </ul> + * + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class NodeMonitor { + + protected static Logger log = Logger.getLogger(NodeMonitor.class.getName()); + + /** The object representing the monitored node */ + private final VespaBackEndSearcher node; + + private boolean isWorking = true; + + /** The last time this node responded successfully */ + private long succeededAt = 0; + + /** Whether it is assumed the node has documents available to serve */ + private boolean searchNodesOnline = true; + + /** + * Creates a new node monitor for a node + */ + public NodeMonitor(final VespaBackEndSearcher node) { + this.node = node; + } + + /** + * Returns whether this node is currently in a state suitable for receiving + * traffic. As far as we know, that is + */ + public boolean isWorking() { + return isWorking; + } + + // Whether or not dispatch has ever responded successfully + private boolean atStartUp = true; + + public VespaBackEndSearcher getNode() { + return node; + } + + /** + * Called when this node fails. + * + * @param error + * A container which should contain a short description + */ + public void failed(final ErrorMessage error) { + long respondedAt = System.currentTimeMillis(); + + if (error.getCode() == BACKEND_COMMUNICATION_ERROR.code + || error.getCode() == NO_ANSWER_WHEN_PINGING_NODE.code) { + // Only count not being able to talk to backend at all + // as errors we care about + if ((respondedAt - succeededAt) > 10000) { + setWorking(false, "Not working for 10 s: " + error.toString()); + } + } else { + succeededAt = respondedAt; + } + } + + /** + * Called when a response is received from this node. + */ + public void responded(boolean searchNodesOnline) { + succeededAt = System.currentTimeMillis(); + this.searchNodesOnline = searchNodesOnline; + atStartUp = false; + + if (!isWorking) { + setWorking(true, "Responds correctly"); + } + } + + /** Changes the state of this node if required */ + private void setWorking(final boolean working, String explanation) { + String explanationToLog; + if (isWorking == working) { + return; // Old news + } + + if (explanation == null) { + explanationToLog = ""; + } else { + explanationToLog = ": " + explanation; + } + + if (working) { + log.info("Putting " + node + " in service" + explanationToLog); + } else { + if (!atStartUp) { + // was warning, see VESPA-1922 + log.info("Taking " + node + " out of service" + explanationToLog); + } + } + + isWorking = working; + } + + boolean searchNodesOnline() { + return searchNodesOnline; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java b/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java new file mode 100644 index 00000000000..d47a5a82023 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/dispatchprototype/DispatchClusterSearcher.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.cluster.dispatchprototype; + +import com.google.common.annotations.Beta; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.provider.ComponentRegistry; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.handler.VipStatus; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.prelude.cluster.ClusterSearcher; +import com.yahoo.prelude.cluster.QrMonitorConfig; +import com.yahoo.prelude.fastsearch.DocumentdbInfoConfig; +import com.yahoo.prelude.fastsearch.FS4ResourcePool; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.search.config.dispatchprototype.SearchNodesConfig; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.vespa.config.search.DispatchConfig; + +import static com.yahoo.container.QrSearchersConfig.Searchcluster; + +/** + * This class modifies ClusterSearcher behavior to talk directly to search nodes instead of dispatchers. + * + * This means that queries are sent to a single search node only. Obviously, this will not give correct + * results - it is just a single step towards eliminating top-level dispatch as a separate process. + * + * @author <a href="mailto:bakksjo@yahoo-inc.com">Oyvind Bakksjo</a> + */ +@Beta +@After("*") +public class DispatchClusterSearcher extends Searcher { + private final ClusterSearcher clusterSearcher; + + public DispatchClusterSearcher( + final ComponentId id, + final SearchNodesConfig searchNodesConfig, + final QrSearchersConfig qrsConfig, + final ClusterConfig clusterConfig, + final DocumentdbInfoConfig documentDbConfig, + final LegacyEmulationConfig emulationConfig, + final QrMonitorConfig monitorConfig, + final DispatchConfig dispatchConfig, + final Statistics manager, + final FS4ResourcePool listeners, + final ComponentRegistry<ClusterSearcher> otherClusterSearchers, + final VipStatus vipStatus) { + + clusterSearcher = new ClusterSearcher( + id, + makeQrSearchersConfigWithSearchNodesInsteadOfDispatcherNodes( + qrsConfig, + searchNodesConfig, + clusterConfig.clusterName()), + clusterConfig, + documentDbConfig, + emulationConfig, + monitorConfig, + dispatchConfig, + manager, + listeners, + vipStatus); + + //Prevent the ClusterSearcher(s) implicitly set up by the model from warning that it can't contact + //the c++ TLD when we disable it in the system test. + otherClusterSearchers.allComponents().stream() + .forEach(ClusterSearcher::deconstruct); + } + + + @Override + public Result search(Query query, Execution execution) { + return clusterSearcher.search(query, execution); + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + clusterSearcher.fill(result, summaryClass, execution); + } + + private static QrSearchersConfig makeQrSearchersConfigWithSearchNodesInsteadOfDispatcherNodes( + final QrSearchersConfig qrsConfig, + final SearchNodesConfig searchNodesConfig, + final String clusterName) { + final QrSearchersConfig.Builder qrSearchersConfigBuilder = new QrSearchersConfig.Builder(); + copyEverythingExceptSearchclusters(qrsConfig, qrSearchersConfigBuilder); + + // We only "copy" (with modifications) a single Searchcluster. + final Searchcluster originalSearchcluster = getSearchclusterByName(qrsConfig, clusterName); + final Searchcluster.Builder searchclusterBuilder = new Searchcluster.Builder(); + copyEverythingExceptDispatchers(originalSearchcluster, searchclusterBuilder); + // Here comes the trick: Substitute search nodes for dispatchers. + for (final SearchNodesConfig.Search_node searchNodeConfig : searchNodesConfig.search_node()) { + searchclusterBuilder.dispatcher( + new Searchcluster.Dispatcher.Builder() + .host(searchNodeConfig.host()) + .port(searchNodeConfig.port())); + } + qrSearchersConfigBuilder.searchcluster(searchclusterBuilder); + + return new QrSearchersConfig(qrSearchersConfigBuilder); + } + + private static void copyEverythingExceptSearchclusters( + final QrSearchersConfig source, + final QrSearchersConfig.Builder destination) { + destination.tag(new QrSearchersConfig.Tag.Builder(source.tag())); + destination.com(new QrSearchersConfig.Com.Builder(source.com())); + destination.customizedsearchers(new QrSearchersConfig.Customizedsearchers.Builder(source.customizedsearchers())); + for (final QrSearchersConfig.External external : source.external()) { + destination.external(new QrSearchersConfig.External.Builder(external)); + } + } + + private static Searchcluster getSearchclusterByName(final QrSearchersConfig qrsConfig, final String clusterName) { + return qrsConfig.searchcluster().stream() + .filter(cluster -> clusterName.equals(cluster.name())) + .findAny() + .orElseThrow(() -> new IllegalStateException("No cluster found with name " + clusterName)); + } + + private static void copyEverythingExceptDispatchers( + final Searchcluster source, + final Searchcluster.Builder destination) { + destination + .name(source.name()) + .searchdef(source.searchdef()) + .rankprofiles(new Searchcluster.Rankprofiles.Builder(source.rankprofiles())) + .indexingmode(source.indexingmode()) + // Deliberately excluding storagecluster here because it's not relevant. + .rowbits(source.rowbits()); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java b/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java new file mode 100644 index 00000000000..e4dbfbb3a1b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/cluster/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.cluster; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java new file mode 100644 index 00000000000..44107499b40 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ByteField.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a byte field in the result set + * + */ + +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class ByteField extends DocsumField { + static final byte EMPTY_VALUE = Byte.MIN_VALUE; + + public ByteField(String name) { + super(name); + } + + private Object convert(byte value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Byte.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.get()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Byte.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((byte)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java new file mode 100644 index 00000000000..fdc76835e1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheControl.java @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.search.Query; +import com.yahoo.processing.request.CompoundName; + + +/** + * The cache control logic for FastSearcher + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CacheControl { + + private static final CompoundName nocachewrite=new CompoundName("nocachewrite"); + + /** Whether this CacheControl actually should cache hits at all. */ + private final boolean activeCache; + + /** Direct unsychronized cache access */ + private final PacketCache packetCache; + + public CacheControl(int sizeMegaBytes, double cacheTimeOutSeconds) { + activeCache = sizeMegaBytes > 0 && cacheTimeOutSeconds > 0.0d; + if (activeCache) { + packetCache = new PacketCache(sizeMegaBytes, 0, cacheTimeOutSeconds); + } else { + packetCache = null; + } + } + + /** Returns the capacity of the packet cache in megabytes */ + public final int capacity() { + return packetCache.getCapacity(); + } + + public final boolean useCache(Query query) { + return (activeCache && !query.getNoCache()); + } + + public final PacketWrapper lookup(CacheKey key, Query query) { + if ((key != null) && useCache(query)) { + long now = System.currentTimeMillis(); + synchronized (packetCache) { + return packetCache.get(key, now); + } + } + return null; + } + + // updates first phase in multi phase search + void updateCacheEntry(CacheKey key, Query query, QueryResultPacket resultPacket) { + long oldTimestamp; + if (!activeCache) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper == null) return; + + // The timestamp is owned by the QueryResultPacket, this is why this + // update method puts entries into the cache differently from elsewhere + oldTimestamp = wrapper.getTimestamp(); + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addResultPacket(resultPacket); + synchronized (packetCache) { + packetCache.put(key, wrapper, oldTimestamp); + } + } + + // updates phases after first phase phase in multi phase search + void updateCacheEntry(CacheKey key, Query query, DocsumPacketKey[] packetKeys, Packet[] packets) { + if (!activeCache) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper== null) return; + + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addDocsums(packetKeys, packets); + synchronized (packetCache) { + packetCache.put(key, wrapper, wrapper.getTimestamp()); + } + } + + void cache(CacheKey key, Query query, DocsumPacketKey[] packetKeys, Packet[] packets) { + if ( ! activeCache) return; + + if (query.getNoCache()) return; + if (query.properties().getBoolean(nocachewrite)) return; + + PacketWrapper wrapper = lookup(key, query); + if (wrapper == null) { + wrapper = new PacketWrapper(key, packetKeys,packets); + long now = System.currentTimeMillis(); + synchronized (packetCache) { + packetCache.put(key, wrapper, now); + } + } else { + wrapper = (PacketWrapper) wrapper.clone(); + wrapper.addResultPacket((QueryResultPacket) packets[0]); + wrapper.addDocsums(packetKeys, packets, 1); + synchronized (packetCache) { + packetCache.put(key, wrapper, wrapper.getTimestamp()); + } + } + } + + /** Test method. */ + public void clear() { + if (packetCache != null) { + packetCache.clear(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java new file mode 100644 index 00000000000..cd330603b3d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheKey.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.Arrays; + +import com.yahoo.collections.BobHash; +import com.yahoo.fs4.QueryPacket; + + +/** + * The key used in the packet cache. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CacheKey { + private int hashCode; + private byte[] serialized = null; + + /** + * Create a cache key from the query packet. + */ + public CacheKey(QueryPacket queryPacket) { + if (!queryPacket.isEncoded()) { + queryPacket.allocateAndEncode(0); + } + this.serialized = queryPacket.getOpaqueCacheKey(); + hashCode = calculateHashCode(); + } + + private int calculateHashCode() { + return BobHash.hash(serialized, 0); + } + + public boolean equals(Object o) { + if (o == null) { + return false; + } + if (!(o instanceof CacheKey)) { + return false; + } + + CacheKey k = (CacheKey) o; + return Arrays.equals(serialized, k.serialized); + // // The following is used for detailed debugging + // boolean state = true; + // if (serialized.length != k.serialized.length) { + // System.out.println("this " + serialized.length + " other " + k.serialized.length); + // return false; + // } + // System.out.println("start of arrays"); + // for (int i = 0; i < serialized.length; ++i) { + // System.out.print("serialized " + serialized[i] + " " + k.serialized[i]); + // if (serialized[i] != k.serialized[i]) { + // System.out.println(" diff at index " + i); + // state = false; // want to see all the data + // } else { + // System.out.println(""); + // } + // } + // return state; + } + + public int hashCode() { + return hashCode; + } + + public byte[] getCopyOfFullKey() { + return Arrays.copyOf(serialized, serialized.length); + } + + /** + * Return an estimate of the memory used by this object. Ie the sum of + * the internal data fields. + */ + public int byteSize() { + // 4 = sizeOf(hashCode) + return serialized.length + 4; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java new file mode 100644 index 00000000000..f7714ce1457 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/CacheParams.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +/** + * Helper class for carrying around cache-related + * config parameters to the FastSearcher class. + * + * @author arnej27959 + */ +public class CacheParams { + public int cacheMegaBytes = 0; + public double cacheTimeOutSeconds = 0; + public CacheControl cacheControl = null; + + public CacheParams(int megabytes, double timeoutseconds) { + this.cacheMegaBytes = megabytes; + this.cacheTimeOutSeconds = timeoutseconds; + } + + public CacheParams(CacheControl cacheControl) { + this.cacheControl = cacheControl; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java new file mode 100644 index 00000000000..d5a17060dd6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ClusterParams.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.container.search.LegacyEmulationConfig; + +/** + * Helper class for carrying around cluster-related + * config parameters to the FastSearcher class. + * + * @author arnej27959 + */ +public class ClusterParams { + public final int clusterNumber; + public final String searcherName; + public final int rowBits; + public final LegacyEmulationConfig emulation; + + /** + * for compatibility + **/ + public ClusterParams(int number, String name, int rowbits) { + this(number, name, rowbits, new LegacyEmulationConfig()); + } + + /** + * for testcases only + **/ + public ClusterParams(String name) { + this(0, name, 0); + } + + /** + * make up full ClusterParams + **/ + public ClusterParams(int number, String name, int rowbits, LegacyEmulationConfig cfg) { + this.clusterNumber = number; + this.searcherName = name; + this.rowBits = rowbits; + this.emulation = cfg; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java new file mode 100644 index 00000000000..0e54adae932 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DataField.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a data field in the result set. a data field + * is basically the same thing as a string field, only that we + * treat it like a raw buffer. Well we SHOULD. we don't actually + * do so. yet. we should probably do some defensive copying and + * return a ByteBuffer...hmm... + * + */ + +package com.yahoo.prelude.fastsearch; + +import java.nio.ByteBuffer; + +import com.yahoo.prelude.hitfield.RawData; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class DataField extends DocsumField implements VariableLengthField { + public DataField(String name) { + super(name); + } + + private Object convert(byte[] value) { + return new RawData(value); + } + + @Override + public Object decode(ByteBuffer b) { + int len = ((int) b.getShort()) & 0xffff; + + byte[] tmp = new byte[len]; + b.get(tmp); + return convert(tmp); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type data"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + int len = ((int) b.getShort()) & 0xffff; + b.position(offset + len + (Short.SIZE >> 3)); + return len + (Short.SIZE >> 3); + } + + @Override + public int sizeOfLength() { + return Short.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return convert(value.asData(Value.empty().asData())); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java new file mode 100644 index 00000000000..2941baf40f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Docsum.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + + +/** + * An instance of a document summary, backed by binary data, which decodes and returns fields on request, + * using the (shared) definition of this docsum. + * + * @author <a href="mailt:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public final class Docsum { + + private final DocsumDefinition definition; + private final byte[] packet; + /** The offsets into the packet data of each field, given the fields index, computed lazily */ + private final int[] fieldOffsets; + /** The largest stored offset */ + private int largestStoredOffset=-1; + + public Docsum(DocsumDefinition definition, byte[] packet) { + this.definition = definition; + this.packet = packet; + fieldOffsets=new int[definition.getFieldCount()]; + } + + public DocsumDefinition getDefinition() { return definition; } + + public Integer getFieldIndex(String fieldName) { + return definition.getFieldIndex(fieldName); + } + + public Object decode(int fieldIndex) { + ByteBuffer b=packetAsBuffer(); + setAndReturnOffsetToField(b, fieldIndex); + return definition.getField(fieldIndex).decode(b); + } + + /** Fetches the field as raw utf-8 if it is a text field. Returns null otherwise */ + public FastHit.RawField fetchFieldAsUtf8(int fieldIndex) { + DocsumField dataType = definition.getField(fieldIndex); + if ( ! (dataType instanceof LongstringField || dataType instanceof XMLField || dataType instanceof StringField)) + return null; + + ByteBuffer b=packetAsBuffer(); + DocsumField field = definition.getField(fieldIndex); + int fieldStart = setAndReturnOffsetToField(b, fieldIndex); // set buffer.pos = start of field + if (field.isCompressed(b)) return null; + int length = field.getLength(b); // scan to end of field + if (field instanceof VariableLengthField) { + int fieldLength = ((VariableLengthField) field).sizeOfLength(); + b.position(fieldStart + fieldLength); // reset to start of field + length -= fieldLength; + } else { + b.position(fieldStart); // reset to start of field + } + byte[] bufferView = new byte[length]; + b.get(bufferView); + return new FastHit.RawField(dataType, bufferView); + } + + public ByteBuffer packetAsBuffer() { + ByteBuffer buffer = ByteBuffer.wrap(packet); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.getInt(); // Skip class id + return buffer; + } + + /** Returns the offset of a given field in the buffer, and sets the position of the buffer to that field start */ + private int setAndReturnOffsetToField(ByteBuffer b, int fieldIndex) { + // find and store missing offsets up to fieldIndex + if (largestStoredOffset<0) { // initial case + fieldOffsets[0]=b.position(); + largestStoredOffset++; + } + while (largestStoredOffset < fieldIndex) { // induction + int offsetOfLargest=fieldOffsets[largestStoredOffset]; + b.position(offsetOfLargest); + fieldOffsets[largestStoredOffset+1]=offsetOfLargest+definition.getField(largestStoredOffset).getLength(b); + largestStoredOffset++; + } + + // return the stored offset + int offset=fieldOffsets[fieldIndex]; + b.position(offset); + return offset; + } + + public String toString() { + return "docsum [definition: " + definition + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java new file mode 100644 index 00000000000..bef0069d525 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinition.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A docsum definition which knows how to decode a certain class of document + * summaries. The docsum definition has a name and a list of field definitions + * + * @author bratseth + * @author Bjørn Borud + */ +public class DocsumDefinition { + + private String name; + private final List<DocsumField> fields; + + /** True if this contains dynamic fields */ + private boolean dynamic = false; + + // Mapping between field names and their index in this.fields + private final Map<String,Integer> fieldNameToIndex; + + DocsumDefinition(DocumentdbInfoConfig.Documentdb.Summaryclass config, LegacyEmulationConfig emulConfig) { + this.name = config.name(); + List<DocsumField> fieldsBuilder = new ArrayList<>(); + Map<String,Integer> fieldNameToIndexBuilder = new HashMap<>(); + + for (DocumentdbInfoConfig.Documentdb.Summaryclass.Fields field : config.fields()) { + // no, don't switch the order of the two next lines :) + fieldNameToIndexBuilder.put(field.name(), fieldsBuilder.size()); + fieldsBuilder.add(DocsumField.create(field.name(), field.type(), emulConfig)); + if (field.dynamic()) + dynamic = true; + } + fields = ImmutableList.copyOf(fieldsBuilder); + fieldNameToIndex = ImmutableMap.copyOf(fieldNameToIndexBuilder); + } + + /** Returns the field at this index, or null if none */ + public DocsumField getField(int fieldIndex) { + if (fieldIndex >= fields.size()) return null; + return fields.get(fieldIndex); + } + + /** Returns the index of a field name */ + public Integer getFieldIndex(String fieldName) { + return fieldNameToIndex.get(fieldName); + } + + @Override + public String toString() { + return "docsum definition '" + getName() + "'"; + } + + public String getName() { + return name; + } + + public int getFieldCount() { + return fields.size(); + } + + public List<DocsumField> getFields() { + return fields; + } + + /** Returns whether this summary contains one or more dynamic fields */ + public boolean isDynamic() { + return dynamic; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java new file mode 100644 index 00000000000..2f0768d4e8b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.slime.BinaryFormat; +import com.yahoo.slime.Slime; +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * A set of docsum definitions + * + * @author bratseth + * @author Bjørn Borud + */ +public final class DocsumDefinitionSet { + public static final int SLIME_MAGIC_ID = 0x55555555; + private final static Logger log = Logger.getLogger(DocsumDefinitionSet.class.getName()); + + private final HashMap<Long, DocsumDefinition> definitions = new HashMap<>(); + private final HashMap<String, DocsumDefinition> definitionsByName = new HashMap<>(); + private final LegacyEmulationConfig emulationConfig; + + public DocsumDefinitionSet(DocumentdbInfoConfig.Documentdb config) { + this.emulationConfig = new LegacyEmulationConfig(); + configure(config); + } + + public DocsumDefinitionSet(DocumentdbInfoConfig.Documentdb config, LegacyEmulationConfig emulConfig) { + this.emulationConfig = emulConfig; + configure(config); + } + + /** Returns a docsum definition by id + * @param id document summary class id + * @return a DocsumDefinition for the id, if found. + */ + public final DocsumDefinition getDocsumDefinition(long id) { + return definitions.get(new Long(id)); + } + + /** + * Returns a docsum definition by name, or null if not found + * + * @param name the name of the summary class to use, or null to use the name "default" + * @return the summary class found, or null if none + */ + public final DocsumDefinition getDocsumDefinition(String name) { + if (name == null) + name="default"; + return definitionsByName.get(name); + } + + /** + * Makes data available for decoding for the given hit. + * + * @param summaryClass the requested summary class + * @param data docsum data from backend + * @param hit the Hit corresponding to this document summary + * @throws ConfigurationException if the summary class of this hit is missing + */ + public final void lazyDecode(String summaryClass, byte[] data, FastHit hit) { + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.order(ByteOrder.LITTLE_ENDIAN); + long docsumClassId = buffer.getInt(); + if (docsumClassId != SLIME_MAGIC_ID) { + DocsumDefinition docsumDefinition = lookupDocsum(docsumClassId); + Docsum docsum = new Docsum(docsumDefinition, data); + hit.addSummary(docsum); + } else { + DocsumDefinition docsumDefinition = lookupDocsum(summaryClass); + Slime value = BinaryFormat.decode(buffer.array(), buffer.arrayOffset()+buffer.position(), buffer.remaining()); + hit.addSummary(docsumDefinition, new SlimeAdapter(value.get())); + } + } + + private DocsumDefinition lookupDocsum(long docsumClassId) { + DocsumDefinition docsumDefinition = getDocsumDefinition(docsumClassId); + if (docsumDefinition == null) { + throw new ConfigurationException("Received hit with summary id " + docsumClassId + + ", but this summary class is not in current summary config (" + toString() + ")" + + " (that is, the system is in an inconsistent state)"); + } + return docsumDefinition; + } + + private DocsumDefinition lookupDocsum(String summaryClass) { + DocsumDefinition ds = definitionsByName.get(summaryClass); + if (ds == null) { + ds = definitionsByName.get("default"); + } + if (ds == null) { + throw new ConfigurationException("Fetched hit with summary class " + summaryClass + + ", but this summary class is not in current summary config (" + toString() + ")" + + " (that is, you asked for something unknown, and no default was found)"); + } + return ds; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + Set<Map.Entry<Long, DocsumDefinition>> entrySet = definitions.entrySet(); + boolean first = true; + for (Iterator<Map.Entry<Long, DocsumDefinition>> itr = entrySet.iterator(); itr.hasNext(); ) { + if (!first) { + sb.append(","); + } else { + first = false; + } + Map.Entry<Long, DocsumDefinition> entry = itr.next(); + sb.append("[").append(entry.getKey()).append(",").append(entry.getValue().getName()).append("]"); + } + return sb.toString(); + } + + public int size() { + return definitions.size(); + } + + private void configure(DocumentdbInfoConfig.Documentdb config) { + for (int i = 0; i < config.summaryclass().size(); ++i) { + DocumentdbInfoConfig.Documentdb.Summaryclass sc = config.summaryclass(i); + DocsumDefinition docSumDef = new DocsumDefinition(sc, emulationConfig); + definitions.put((long) sc.id(), docSumDef); + definitionsByName.put(sc.name(), docSumDef); + } + if (definitions.size() == 0) { + log.warning("No summary classes found in DocumentdbInfoConfig.Documentdb"); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java new file mode 100644 index 00000000000..3aa02f57a1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumField.java @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Logger; +import com.yahoo.data.access.Inspector; +import com.yahoo.container.search.LegacyEmulationConfig; + +import com.yahoo.log.LogLevel; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public abstract class DocsumField { + + private static final Logger log = Logger.getLogger(DocsumField.class.getName()); + private static FieldFactory fieldFactory; + + private static class FieldFactory { + Map<String, Constructor<? extends DocsumField>> constructors = new HashMap<>(); + + void put(final String typename, + final Class<? extends DocsumField> fieldClass) + throws NoSuchMethodException, SecurityException + { + final Constructor<? extends DocsumField> constructor = fieldClass.getConstructor(String.class); + constructors.put(typename, constructor); + } + + DocsumField create(final String typename, final String name, final LegacyEmulationConfig emulConfig) + throws InstantiationException, IllegalAccessException, + IllegalArgumentException, InvocationTargetException + { + DocsumField f = constructors.get(typename).newInstance(name); + f.emulConfig = emulConfig; + return f; + } + } + private LegacyEmulationConfig emulConfig; + final LegacyEmulationConfig getEmulConfig() { return emulConfig; } + + static { + fieldFactory = new FieldFactory(); + + try { + fieldFactory.put("byte", ByteField.class); + fieldFactory.put("short", ShortField.class); + fieldFactory.put("integer", IntegerField.class); + fieldFactory.put("int64", Int64Field.class); + fieldFactory.put("float", FloatField.class); + fieldFactory.put("double", DoubleField.class); + fieldFactory.put("string", StringField.class); + fieldFactory.put("data", DataField.class); + fieldFactory.put("longstring", LongstringField.class); + fieldFactory.put("longdata", LongdataField.class); + fieldFactory.put("jsonstring", StructDataField.class); + fieldFactory.put("featuredata", FeatureDataField.class); + fieldFactory.put("xmlstring", XMLField.class); + } catch (final Exception e) { + log.log(LogLevel.ERROR, + "Could not initialize docsum decoding properly.", e); + } + } + + protected String name; + + protected DocsumField(final String name) { + this.name = name; + } + + /* for unit test only */ + static DocsumField create(final String name, final String typename) { + return create(name, typename, new LegacyEmulationConfig()); + } + + public static DocsumField create(final String name, final String typename, LegacyEmulationConfig emulConfig) { + try { + return fieldFactory.create(typename, name, emulConfig); + } catch (final Exception e) { + throw new RuntimeException("Unknown field type '" + typename + "'", e); + } + } + + public String getName() { + return name; + } + + public boolean isCompressed(final ByteBuffer b) { + return false; + } + + /** + * Decode the field at the current buffer position into the fast hit. + */ + public abstract Object decode(ByteBuffer b, FastHit hit); + + /** + * Decode the field at the current buffer position and simply return the + * value. + */ + public abstract Object decode(ByteBuffer b); + + /** + * Get the number of bytes this field occupies in the given buffer and set + * the position of the first byte after this field. + */ + public abstract int getLength(ByteBuffer b); + + /** + * Convert a generic value into an object of the appropriate type + * for this field. + **/ + public abstract Object convert(Inspector value); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java new file mode 100644 index 00000000000..1e76207e370 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumPacketKey.java @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.document.GlobalId; + + +/** + * Key for each entry in the packetcache. + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class DocsumPacketKey { + private GlobalId globalId; + private int partid; + private int docstamp; + private String summaryClass; + + private static boolean strEquals(String a, String b) { + if (a == null || b == null) { + return (a == null && b == null); + } + return a.equals(b); + } + + private static int strHashCode(String s) { + if (s == null) { + return 0; + } + return s.hashCode(); + } + + public DocsumPacketKey(GlobalId globalId, int partid, String summaryClass) { + this.globalId = globalId; + this.partid = partid; + this.summaryClass = summaryClass; + } + + public GlobalId getGlobalId() { + return globalId; + } + + public int getPartid() { + return partid; + } + + public boolean equals(Object o) { + if (o instanceof DocsumPacketKey) { + DocsumPacketKey other = (DocsumPacketKey) o; + + if (globalId.equals(other.getGlobalId()) + && partid == other.getPartid() + && strEquals(summaryClass, other.summaryClass)) + { + return true; + } + } + return false; + } + + public int hashCode() { + return globalId.hashCode() + 10 * partid + strHashCode(summaryClass); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java new file mode 100644 index 00000000000..c48a8804f9f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.container.search.LegacyEmulationConfig; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Representation of a back-end document database. + * + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +public class DocumentDatabase { + + // TODO: What about name conflicts when different search defs have the same rank profile/docsum? + + public static final String MATCH_PROPERTY = "match"; + public static final String SEARCH_DOC_TYPE_KEY = "documentdb.searchdoctype"; + + private final String name; + private final DocsumDefinitionSet docsumDefSet; + + private final Map<String, RankProfile> rankProfiles; + + public DocumentDatabase(DocumentdbInfoConfig.Documentdb documentDb, LegacyEmulationConfig emulConfig) { + this.name = documentDb.name(); + this.docsumDefSet = new DocsumDefinitionSet(documentDb, emulConfig); + this.rankProfiles = ImmutableMap.copyOf(toRankProfiles(documentDb.rankprofile())); + } + + public String getName() { + return name; + } + + public DocsumDefinitionSet getDocsumDefinitionSet() { + return docsumDefSet; + } + + /** Returns an unmodifiable map of all the rank profiles in this indexed by rank profile name */ + public Map<String, RankProfile> rankProfiles() { return rankProfiles; } + + private Map<String, RankProfile> toRankProfiles(List<DocumentdbInfoConfig.Documentdb.Rankprofile> rankProfileConfigList) { + Map<String, RankProfile> rankProfiles = new HashMap<>(); + for (DocumentdbInfoConfig.Documentdb.Rankprofile c : rankProfileConfigList) { + rankProfiles.put(c.name(), new RankProfile(c.name(), c.hasSummaryFeatures(), c.hasRankFeatures())); + } + return rankProfiles; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java new file mode 100644 index 00000000000..d42d5567718 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DoubleField.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class DoubleField extends DocsumField { + static final double EMPTY_VALUE = Double.NaN; + + public DoubleField(String name) { + super(name); + } + + private Object convert(double value) { + if (Double.isNaN(value)) { + return NanNumber.NaN; + } else { + return Double.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getDouble()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int byteLength = Double.SIZE >> 3; + b.position(offset + byteLength); + return byteLength; + } + + public Object convert(Inspector value) { + return convert(value.asDouble(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java new file mode 100644 index 00000000000..1aa226dbeb8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FS4ResourcePool.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.ThreadFactoryFactory; +import com.yahoo.container.Server; +import com.yahoo.container.search.Fs4Config; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.fs4.mplex.ConnectionPool; +import com.yahoo.fs4.mplex.ListenerPool; +import com.yahoo.io.Connection; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Timer; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Provider for {@link com.yahoo.fs4.mplex.ListenerPool}. All users will get the same pool instance. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + * @since 5.4.0 + */ +public class FS4ResourcePool extends AbstractComponent { + private static final Logger logger = Logger.getLogger(FS4ResourcePool.class.getName()); + private static final AtomicInteger instanceCounter = new AtomicInteger(0); + private final int instanceId; + private final ListenerPool listeners; + private final Timer timer = new Timer(); // This is a timer for cleaning the closed connections + private Map<String, Backend> connectionPoolMap = new HashMap<>(); + private final ExecutorService executor; + private final ScheduledExecutorService scheduledExecutor; + + public FS4ResourcePool(Fs4Config fs4Config) { + instanceId = instanceCounter.getAndIncrement(); + logger.log(Level.INFO, "Constructing an FS4ResourcePool with id '" + instanceId + "' with config '" + fs4Config.toString() + "'"); + String name = "FS4-" + instanceId; + listeners = new ListenerPool(name, fs4Config.numlistenerthreads()); + executor = Executors.newCachedThreadPool(ThreadFactoryFactory.getDaemonThreadFactory(name)); + scheduledExecutor = Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory(name + ".scheduled")); + } + + public ExecutorService getExecutor() { + return executor; + } + public ScheduledExecutorService getScheduledExecutor() { + return scheduledExecutor; + } + public Backend getBackend(String host, int port) { + + String key = host + ":" + port; + synchronized (connectionPoolMap) { + Backend pool = connectionPoolMap.get(key); + if (pool == null) { + pool = new Backend(host, port, Server.get().getServerDiscriminator(), listeners, new ConnectionPool(timer)); + connectionPoolMap.put(key, pool); + } + return pool; + } + } + + @Override + public void deconstruct() { + logger.log(Level.INFO, "Deconstructing FS4ResourcePool with id '" + instanceId + "'."); + super.deconstruct(); + listeners.close(); + timer.cancel(); + for (Backend backend : connectionPoolMap.values()) { + backend.shutdown(); + backend.close(); + } + executor.shutdown(); + scheduledExecutor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.SECONDS); + scheduledExecutor.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + logger.warning("Executors failed terminating within timeout of 10 seconds : " + e); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java new file mode 100644 index 00000000000..ee3f9ac0583 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastHit.java @@ -0,0 +1,442 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.google.common.annotations.Beta; +import com.yahoo.document.GlobalId; +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.net.URI; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value.StringValue; + +/** + * A regular hit from a Vespa backend + * + * @author bratseth + * @author steinar + */ +public class FastHit extends Hit { + + public static final String SUMMARY = "summary"; + + private static final long serialVersionUID = 298098891191029589L; + + /** The global id of this document in the backend node which produced it */ + private GlobalId globalId = new GlobalId(new byte[GlobalId.LENGTH]); + + /** Part ID */ + private int partId; + + /** DistributionKey (needed to generate getDocsumPacket, for two-phase search) */ + private int distributionKey = 0; + + /** The index uri of this. Lazily set */ + private URI indexUri = null; + + /** + * The number of least significant bits in the part id which specifies the + * row in the search cluster which produced this hit. The other bits + * specifies the column. 0 if not known. + */ + private int rowBits = 0; + + /** + * Whether or not to ignore the row bits. If this is set, FastSearcher is + * allowed to choose an appropriate row. + */ + private boolean ignoreRowBits = false; + + /** + * Whether to use the row number in the index uri, see FastSearcher for + * details + */ + private boolean useRowInIndexUri = true; + + private transient QueryPacketData queryPacketData = null; + private transient CacheKey cacheKey = null; + + /** + * Creates an empty and temporarily invalid summary hit + */ + public FastHit() { } + + public FastHit(String uri, double relevancy) { + this(uri, relevancy, null); + } + + public FastHit(String uri, double relevance, String source) { + setId(uri); + super.setField("uri", uri); + setRelevance(new Relevance(relevance)); + setSource(source); + types().add(SUMMARY); + setPartId(0, 0); + } + + public String toString() { + return super.toString() + " [fasthit, globalid: " + globalId + ", partId: " + + partId + ", distributionkey: " + distributionKey + "]"; + } + + public static String asHexString(GlobalId gid) { + StringBuilder sb = new StringBuilder(); + byte[] rawGid = gid.getRawId(); + for (byte b : rawGid) { + String hex = Integer.toHexString(0xFF & b); + if (hex.length() == 1) { + sb.append('0'); + } + sb.append(hex); + } + return sb.toString(); + } + + @Override + public int hashCode() { + if (getId() == null) { + throw new IllegalStateException("This hit must have a 'uri' field, and this fild must be filled through " + + "Execution.fill(Result)) before hashCode() is accessed."); + } else { + return super.hashCode(); + } + } + + @Override + public URI getId() { + return getUri(); // Make sure we decode it if the id is encoded + } + + /** + * Returns the explicitly set uri if available, returns + * "index:[source]/[partid]/[id]" otherwise + * @return uri of hit + */ + public URI getUri() { + URI uri = super.getId(); + if (uri != null) return uri; + + // TODO: Remove, this should be one of the last vestiges of URL field magic + if (fields().containsKey("uri")) { + // trigger decoding + Object o = getField("uri"); + setId(o.toString()); + return super.getId(); + } + + return getIndexUri(); + } + + /** + * The uri of the index location of this hit + * ("index:[source]/[partid]/[id]"). This is the uri if no other uri is + * assigned + * @return uri to the index. + */ + public URI getIndexUri() { + if (indexUri != null) return indexUri; + + String rowString = "-"; + if (useRowInIndexUri) + rowString = String.valueOf(getRow()); + + return new URI("index:" + getSourceNumber() + "/" + getColumn() + "/" + rowString + "/" + asHexString(getGlobalId())); + } + + /** Returns the global id of this document in the backend node which produced it */ + public GlobalId getGlobalId() { + return globalId; + } + + public void setGlobalId(GlobalId globalId) { + this.globalId = globalId; + } + + public int getPartId() { + return partId; + } + + /** + * Sets the part id number, which specifies the node where this hit is + * found. The row count is used to decode the part id into a column and a + * row number: the number of n least significant bits required to hold the + * highest row number are the row bits, the rest are column bits. + * + * @param partId partition id + * @param rowBits number of bits to encode row number + */ + public void setPartId(int partId, int rowBits) { + this.partId = partId; + this.rowBits = rowBits; + } + + /** + * + * @param useRowInIndexUri Sets whether to use the row in the index uri. See FastSearcher for details. + */ + public void setUseRowInIndexUri(boolean useRowInIndexUri) { + this.useRowInIndexUri = useRowInIndexUri; + } + + /** + * @return Returns the column number where this hit originated, or partId if not known + */ + public int getColumn() { + return partId >>> rowBits; + } + + /** + * @return the row number where this hit originated, or 0 if not known + * */ + public int getRow() { + if (rowBits == 0) { + return 0; + } + + return partId & ((1 << rowBits) - 1); + } + + /** + * <p>Returns a field value from this Hit. The value is either a stored value from the Document represented by + * this Hit, or a generated value added during later processing.</p> + * + * <p>The values available from the matching Document are a <i>subset</i> of the values set in the document, + * determined by the {@link #getFilled() filled} status of this Hit. More fields may be requested by requesting + * further filling.</p> + * + * <p>Lookups on names which does not exists in the document and is not added by later processing + * return null.</p> + * + * <p>Lookups on fields which exist in the document, in a summary class which is already requested + * filled returns the following types, even when the field has no actual value:</p> + * + * <ul> + * <li><b>Dynamic summary string fields</b>: A Java String before JuniperSearcher and a HitField after.</li> + * <li><b>string/uri/content</b>: A Java String.<br> + * The empty string ("") if no value is assigned in the document. + * + * <li><b>Numerics</b>: The corresponding numeric Java type.<br> + * If the field has <i>no value</i> assigned in the document, + * the special numeric {@link com.yahoo.search.result.NanNumber#NaN} is returned. + * + * <li><b>raw</b>: A {@link com.yahoo.prelude.hitfield.RawData} instance + * + * <li><b>multivalue fields</b>: A {@link com.yahoo.prelude.hitfield.JSONString} instance + * </ul> + */ + @Override + public Object getField(String key) { + Object value = super.getField(key); + + if (value instanceof LazyValue) { + return getAndCacheLazyValue(key, (LazyValue) value); + } else { + return value; + } + } + + private Object getAndCacheLazyValue(String key, LazyValue value) { + Object forcedValue = value.getValue(key); + setField(key, forcedValue); + return forcedValue; + } + + /** Returns false - this is a concrete hit containing requested content */ + public boolean isMeta() { + return false; + } + + /** + * Only needed when fetching summaries in 2 phase. + * + * @return distribution key of node where the hit originated from + */ + public int getDistributionKey() { + return distributionKey; + } + + /** + * Only needed when fetching summaries in 2 phase. + * @param distributionKey Of node where you find this hit. + */ + public void setDistributionKey(int distributionKey) { + this.distributionKey = distributionKey; + } + + public void addSummary(Docsum docsum) { + LazyDocsumValue lazyDocsumValue = new LazyDocsumValue(docsum); + for (DocsumField field : docsum.getDefinition().getFields()) { + setDocsumFieldIfNotPresent(field.getName(), lazyDocsumValue); + } + } + + void addSummary(DocsumDefinition docsumDef, Inspector value) { + for (DocsumField field : docsumDef.getFields()) { + String fieldName = field.getName(); + if (value.type() == Type.STRING && + (field instanceof LongstringField || + field instanceof StringField || + field instanceof XMLField)) + { + setDocsumFieldIfNotPresent(fieldName, new LazyString(field, value)); + } else { + Inspector f = value.field(fieldName); + if (field.getEmulConfig().forceFillEmptyFields() || f.valid()) { + setDocsumFieldIfNotPresent(fieldName, field.convert(f)); + } + } + } + } + + private void setDocsumFieldIfNotPresent(String fieldName, Object value) { + if (super.getField(fieldName) == null) { + setField(fieldName, value); + } + } + + /** + * Set a field to behave like a string type summary field, not decoding raw + * data till actually used. Added to make testing lazy docsum functionality + * easier. This is not a method to be used for efficiency, as it causes + * object allocations. + * + * @param fieldName + * the name of the field to insert undecoded UTF-8 into + * @param value + * an array of valid UTF-8 data + */ + @Beta + public void setLazyStringField(String fieldName, byte[] value) { + setField(fieldName, new LazyString(new StringField(fieldName), new StringValue(value))); + } + + public static final class RawField { + private final boolean needXmlEscape; + + private final byte[] contents; + + public RawField(DocsumField fieldType, byte[] contents) { + needXmlEscape = ! (fieldType instanceof XMLField); + this.contents = contents; + } + public RawField(byte [] contents) { + needXmlEscape = true; + this.contents = contents; + } + + public byte [] getUtf8() { return contents; } + public boolean needXmlEscape() { return needXmlEscape; } + } + + /** + * Add the binary data common for the query packet to a Vespa backend and a + * summary fetch packet to a Vespa backend. This method can only be called + * once for a single hit. + * + * @param queryPacketData binary data from a query packet resulting in this hit + * @throws IllegalStateException if the method is called more than once + * @throws NullPointerException if trying to set query packet data to null + */ + public void setQueryPacketData(QueryPacketData queryPacketData) { + if (this.queryPacketData != null) + throw new IllegalStateException("Query packet data already set to " + + this.queryPacketData + ", tried to set it to " + queryPacketData); + if (queryPacketData == null) + throw new NullPointerException("Query packet data reference can not be set to null."); + this.queryPacketData = queryPacketData; + } + + /** + * Fetch binary data from the query packet which produced this hit. These + * data may not be available, this method will then return null. + * + * @return wrapped binary data from a query packet, or null + */ + public QueryPacketData getQueryPacketData() { + return queryPacketData; + } + + public void clearQueryPacketData() { + queryPacketData = null; + } + + CacheKey getCacheKey() { + return cacheKey; + } + + void setCacheKey(CacheKey cacheKey) { + this.cacheKey = cacheKey; + } + + public void setIgnoreRowBits(boolean ignoreRowBits) { + this.ignoreRowBits = ignoreRowBits; + } + + public boolean shouldIgnoreRowBits() { + return ignoreRowBits; + } + + public boolean fieldIsNotDecoded(String name) { + return super.getField(name) instanceof LazyValue; + } + + public RawField fetchFieldAsUtf8(String fieldName) { + Object value = super.getField(fieldName); + if (value instanceof LazyValue) { + return ((LazyValue) value).getFieldAsUtf8(fieldName); + } else { + throw new IllegalStateException("Field " + fieldName + " has already been decoded:" + value); + } + } + + private static abstract class LazyValue { + abstract Object getValue(String fieldName); + abstract RawField getFieldAsUtf8(String fieldName); + } + + /** + * Represents a value that resides in the docsum. + */ + private static class LazyDocsumValue extends LazyValue { + private final Docsum docsum; + + LazyDocsumValue(Docsum docsum) { + this.docsum = docsum; + } + + Object getValue(String fieldName) { + return docsum.decode(getFieldIndex(fieldName)); + } + + private int getFieldIndex(String fieldName) { + Integer index = docsum.getFieldIndex(fieldName); + if (index == null) throw new AssertionError("Invalid fieldName " + fieldName); + return index; + } + + RawField getFieldAsUtf8(String fieldName) { + return docsum.fetchFieldAsUtf8(getFieldIndex(fieldName)); + } + } + + private static class LazyString extends LazyValue { + private final Inspector value; + private final DocsumField fieldType; + + LazyString(DocsumField fieldType, Inspector value) { + assert(value.type() == Type.STRING); + this.value = value; + this.fieldType = fieldType; + } + + Object getValue(String fieldName) { + return value.asString(); + } + + RawField getFieldAsUtf8(String fieldName) { + return new RawField(fieldType, value.asUtf8()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java new file mode 100644 index 00000000000..dfca9c49cba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FastSearcher.java @@ -0,0 +1,566 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.Optional; + +import com.yahoo.compress.CompressionType; +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.ChannelTimeoutException; +import com.yahoo.fs4.GetDocSumsPacket; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.PingPacket; +import com.yahoo.fs4.PongPacket; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.fs4.mplex.Backend; +import com.yahoo.fs4.mplex.FS4Channel; +import com.yahoo.fs4.mplex.InvalidChannelException; +import com.yahoo.prelude.Ping; +import com.yahoo.prelude.Pong; +import com.yahoo.prelude.querytransform.QueryRewrite; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.dispatch.Dispatcher; +import com.yahoo.search.query.Ranking; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Iterator; +import java.util.TimeZone; +import java.util.logging.Level; + +import static com.yahoo.container.util.Util.quote; + +/** + * The searcher which forwards queries to fdispatch nodes, using the fnet/fs4 + * network layer. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +// TODO: Clean up all the duplication in the various search methods by +// switching to doing all the error handling using exceptions below doSearch2. +// Right now half is done by exceptions handled in doSearch2 and half by setting +// errors on results and returning them. It could be handy to create a QueryHandlingErrorException +// or similar which could wrap an error message, and then just always throw that and +// catch and unwrap into a results with an error in high level methods. -Jon +public class FastSearcher extends VespaBackEndSearcher { + + /** If this is turned on this will fill summaries by dispatching directly to search nodes over RPC */ + private final static CompoundName dispatchSummaries = new CompoundName("dispatch.summaries"); + + /** The compression method which will be used with rpc dispatch. "lz4" (default) and "none" is supported. */ + private final static CompoundName dispatchCompression = new CompoundName("dispatch.compression"); + + /** Used to dispatch directly to search nodes over RPC, replacing the old fnet communication path */ + private final Dispatcher dispatcher; + + /** Time (in ms) at which the index of this searcher was last modified */ + private volatile long editionTimeStamp = 0; + + /** Edition of the index */ + private int docstamp; + + private Backend backend; + + /** + * Creates a Fastsearcher. + * + * @param backend The backend object for this FastSearcher + * @param docSumParams Document summary parameters + * @param clusterParams The cluster number, and other cluster backend parameters + * @param cacheParams The size, lifetime, and controller of our cache + * @param documentdbInfoConfig Document database parameters + */ + public FastSearcher(Backend backend, Dispatcher dispatcher, SummaryParameters docSumParams, ClusterParams clusterParams, + CacheParams cacheParams, DocumentdbInfoConfig documentdbInfoConfig) { + init(docSumParams, clusterParams, cacheParams, documentdbInfoConfig); + this.backend = backend; + this.dispatcher = dispatcher; + } + + /** Clears the packet cache if the received timestamp is older than our timestamp */ + private void checkTimestamp(QueryResultPacket resultPacket) { + checkTimestamp(resultPacket.getDocstamp()); + } + + /** Clears the packet cache if the received timestamp is older than our timestamp */ + private void checkTimestamp(int newDocstamp) { + if (docstamp < newDocstamp) { + long currentTimeMillis = System.currentTimeMillis(); + + docstamp = newDocstamp; + setEditionTimeStamp(currentTimeMillis); + } + } + + private static SimpleDateFormat isoDateFormat; + + static { + isoDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z"); + isoDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + } + + private int countNumberOfFastHits(Result result) { + int numFastHits = 0; + + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + + if (hit instanceof FastHit) { + numFastHits++; + } + } + return numFastHits; + } + + /** + * Pings the backend. Does not propagate to other searchers. + */ + @Override + public Pong ping(Ping ping, Execution execution) { + // If you want to change this code, you need to understand + // com.yahoo.prelude.cluster.ClusterSearcher.ping(Searcher) and + // com.yahoo.prelude.cluster.TrafficNodeMonitor.failed(ErrorMessage) + FS4Channel channel = backend.openPingChannel(); + + try { + PingPacket pingPacket = new PingPacket(); + pingPacket.enableActivedocsReporting(); + Pong pong = new Pong(); + + try { + boolean couldSend = channel.sendPacket(pingPacket); + if (!couldSend) { + pong.addError(ErrorMessage.createBackendCommunicationError("Could not ping in " + getName())); + return pong; + } + } catch (InvalidChannelException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Invalid channel " + getName())); + return pong; + } catch (IllegalStateException e) { + pong.addError( + ErrorMessage.createBackendCommunicationError("Illegal state in FS4: " + e.getMessage())); + return pong; + } catch (IOException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("IO error while sending ping: " + e.getMessage())); + return pong; + } + // We should only get a single packet + BasicPacket[] packets; + + try { + packets = channel.receivePackets(ping.getTimeout(), 1); + } catch (ChannelTimeoutException e) { + pong.addError(ErrorMessage.createNoAnswerWhenPingingNode("timeout while waiting for fdispatch for " + getName())); + return pong; + } catch (InvalidChannelException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Invalid channel for " + getName())); + return pong; + + } + + if (packets.length == 0) { + pong.addError(ErrorMessage.createBackendCommunicationError(getName() + " got no packets back")); + return pong; + } + + if (isLoggingFine()) { + getLogger().finest("got packets " + packets.length + " packets"); + } + + try { + ensureInstanceOf(PongPacket.class, packets[0]); + } catch (TimeoutException e) { + pong.addError(ErrorMessage.createTimeout(e.getMessage())); + return pong; + } catch (IOException e) { + pong.addError(ErrorMessage.createBackendCommunicationError("Unexpected packet class returned after ping: " + e.getMessage())); + return pong; + } + pong.addPongPacket((PongPacket) packets[0]); + checkTimestamp(((PongPacket) packets[0]).getDocstamp()); + return pong; + } finally { + if (channel != null) { + channel.close(); + } + } + } + + protected void transformQuery(Query query) { + QueryRewrite.rewriteSddocname(query); + } + + @Override + public Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution) { + FS4Channel channel = null; + try { + channel = backend.openChannel(); + channel.setQuery(query); + + // If not found, then fetch from the source. The call to + // insert into cache will be made from within searchTwoPhase + Result result = searchTwoPhase(channel, query, queryPacket, cacheKey); + + if (query.properties().getBoolean(Ranking.RANKFEATURES, false)) { + // There is currently no correct choice for which + // summary class we want to fetch at this point. If we + // fetch the one selected by the user it may not + // contain the data we need. If we fetch the default + // one we end up fetching docsums twice unless the + // user also requested the default one. + fill(result, query.getPresentation().getSummary(), execution); // ARGH + } + return result; + } catch (TimeoutException e) { + return new Result(query,ErrorMessage.createTimeout(e.getMessage())); + } catch (IOException e) { + Result result = new Result(query); + if (query.getTraceLevel() >= 1) + query.trace(getName() + " error response: " + result, false, 1); + result.hits().addError(ErrorMessage.createBackendCommunicationError(getName() + " failed: "+ e.getMessage())); + return result; + } finally { + if (channel != null) { + channel.close(); + } + } + } + + /** + * Only used to fill the sddocname field when using direct dispatching as that is normally done in VespaBackEndSearcher.decodeSummary + * @param result The result + */ + private void fillSDDocName(Result result) { + DocumentDatabase db = getDocumentDatabase(result.getQuery()); + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + if (hit instanceof FastHit) { + hit.setField(Hit.SDDOCNAME_FIELD, db.getName()); + } + } + } + /** + * Perform a partial docsum fill for a temporary result + * representing a partition of the complete fill request. + * + * @param result result containing a partition of the unfilled hits + * @param summaryClass the summary class we want to fill with + **/ + protected void doPartialFill(Result result, String summaryClass) { + if (result.isFilled(summaryClass)) return; + + Query query = result.getQuery(); + traceQuery(getName(), "fill", query, query.getOffset(), query.getHits(), 2, quotedSummaryClass(summaryClass)); + + if (query.properties().getBoolean(dispatchSummaries)) { + CompressionType compression = + CompressionType.valueOf(query.properties().getString(dispatchCompression, "LZ4").toUpperCase()); + fillSDDocName(result); + dispatcher.fill(result, summaryClass, compression); + return; + } + + CacheKey cacheKey = null; + PacketWrapper packetWrapper = null; + if (getCacheControl().useCache(query)) { + cacheKey = fetchCacheKeyFromHits(result.hits(), summaryClass); + if (cacheKey == null) { + QueryPacket queryPacket = QueryPacket.create(query); + cacheKey = new CacheKey(queryPacket); + } + packetWrapper = cacheLookupTwoPhase(cacheKey, result,summaryClass); + } + + FS4Channel channel = backend.openChannel(); + channel.setQuery(query); + Packet[] receivedPackets; + try { + DocsumPacketKey[] packetKeys; + + if (countNumberOfFastHits(result) > 0) { + packetKeys = getPacketKeys(result, summaryClass, false); + if (packetKeys.length == 0) { + receivedPackets = new Packet[0]; + } else { + try { + receivedPackets = fetchSummaries(channel, result, summaryClass); + } catch (InvalidChannelException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError("Invalid channel " + getName() + " (summary fetch)")); + return; + } catch (ChannelTimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout("timeout waiting for summaries from " + getName())); + return; + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError( + "IO error while talking on channel " + getName() + " (summary fetch): " + e.getMessage())); + return; + } + if (receivedPackets.length == 0) { + result.hits().addError(ErrorMessage.createBackendCommunicationError(getName() + " got no packets back (summary fetch)")); + return; + } + } + } else { + packetKeys = new DocsumPacketKey[0]; + receivedPackets = new Packet[0]; + } + + int skippedHits; + try { + skippedHits = fillHits(result, 0, receivedPackets, summaryClass); + } catch (TimeoutException e) { + result.hits().addError(ErrorMessage.createTimeout(e.getMessage())); + return; + } catch (IOException e) { + result.hits().addError(ErrorMessage.createBackendCommunicationError("Error filling hits with summary fields, source: " + getName())); + return; + } + if (skippedHits==0 && packetWrapper != null) { + cacheControl.updateCacheEntry(cacheKey, query, packetKeys, receivedPackets); + } + + if ( skippedHits>0 ) { + getLogger().info("Could not fill summary '" + summaryClass + "' for " + skippedHits + " hits for query: " + result.getQuery()); + result.hits().addError(com.yahoo.search.result.ErrorMessage.createEmptyDocsums("Missing hit data for summary '" + summaryClass + "' for " + skippedHits + " hits")); + } + result.analyzeHits(); + + if (query.getTraceLevel() >= 3) { + int hitNumber = 0; + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + if ( ! (hit instanceof FastHit)) continue; + FastHit fastHit = (FastHit) hit; + + String traceMsg = "Hit: " + (hitNumber++) + " from " + (fastHit.isCached() ? "cache" : "backend" ); + if ( ! fastHit.isFilled(summaryClass)) + traceMsg += ". Error, hit, not filled"; + query.trace(traceMsg, false, 3); + } + } + } finally { + channel.close(); + } + } + + private static @NonNull Optional<String> quotedSummaryClass(String summaryClass) { + return Optional.of(summaryClass == null ? "[null]" : quote(summaryClass)); + } + + private CacheKey fetchCacheKeyFromHits(HitGroup hits, String summaryClass) { + for (Iterator<Hit> i = hits.unorderedDeepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit) h; + if (hit.isFilled(summaryClass)) { + continue; + } + if (hit.getCacheKey() != null) { + return hit.getCacheKey(); + } + } + } + return null; + } + + private Result searchTwoPhase(FS4Channel channel, Query query, QueryPacket queryPacket, CacheKey cacheKey) throws IOException { + + if (isLoggingFine()) + getLogger().finest("sending query packet"); + + try { + boolean couldSend = channel.sendPacket(queryPacket); + if ( ! couldSend) + return new Result(query,ErrorMessage.createBackendCommunicationError("Could not reach '" + getName() + "'")); + } catch (InvalidChannelException e) { + return new Result(query,ErrorMessage.createBackendCommunicationError("Invalid channel " + getName())); + } catch (IllegalStateException e) { + return new Result(query, ErrorMessage.createBackendCommunicationError("Illegal state in FS4: " + e.getMessage())); + } + + BasicPacket[] basicPackets; + + try { + basicPackets = channel.receivePackets(Math.max(50, query.getTimeLeft()), 1); + } catch (ChannelTimeoutException e) { + return new Result(query,ErrorMessage.createTimeout("Timeout while waiting for " + getName())); + } catch (InvalidChannelException e) { + return new Result(query,ErrorMessage.createBackendCommunicationError("Invalid channel for " + getName())); + } + + if (basicPackets.length == 0) { + return new Result(query,ErrorMessage.createBackendCommunicationError(getName() + " got no packets back")); + } + + if (isLoggingFine()) + getLogger().finest("got packets " + basicPackets.length + " packets"); + + ensureInstanceOf(QueryResultPacket.class, basicPackets[0]); + QueryResultPacket resultPacket = (QueryResultPacket) basicPackets[0]; + + checkTimestamp(resultPacket); + + if (isLoggingFine()) + getLogger().finest("got query packet. " + "docsumClass=" + query.getPresentation().getSummary()); + + if (query.getPresentation().getSummary() == null) + query.getPresentation().setSummary(getDefaultDocsumClass()); + + Result result = new Result(query); + + addMetaInfo(query, queryPacket.getQueryPacketData(), resultPacket, result, false); + + addUnfilledHits(result, resultPacket.getDocuments(), false, queryPacket.getQueryPacketData(), cacheKey); + Packet[] packets; + PacketWrapper packetWrapper = cacheControl.lookup(cacheKey, query); + + if (packetWrapper != null) { + cacheControl.updateCacheEntry(cacheKey, query, resultPacket); + } + else { + if (resultPacket.getCoverageFeature() && ! resultPacket.getCoverageFull()) { + // Don't add error here, it was done in first phase + // No check if packetWrapper already exists, since incomplete + // first phase data won't be cached anyway. + } else { + packets = new Packet[1]; + packets[0] = resultPacket; + cacheControl.cache(cacheKey, query, new DocsumPacketKey[0], packets); + } + } + return result; + } + + private Packet[] convertBasicPackets(BasicPacket[] basicPackets) throws ClassCastException { + // trying to cast a BasicPacket[] to Packet[] will compile, + // but lead to a runtime error. At least that's what I got + // from testing and reading the specification. I'm just happy + // if someone tells me what's the proper Java way of doing + // this. -SK + Packet[] packets = new Packet[basicPackets.length]; + + for (int i = 0; i < basicPackets.length; i++) { + packets[i] = (Packet) basicPackets[i]; + } + return packets; + } + + private Packet[] fetchSummaries(FS4Channel channel, Result result, String summaryClass) + throws InvalidChannelException, ChannelTimeoutException, ClassCastException, IOException { + + BasicPacket[] receivedPackets; + boolean summaryNeedsQuery = summaryNeedsQuery(result.getQuery()); + if (result.getQuery().getTraceLevel() >=3) + result.getQuery().trace((summaryNeedsQuery ? "Resending " : "Not resending ") + "query during document summary fetching", 3); + + GetDocSumsPacket docsumsPacket = GetDocSumsPacket.create(result, summaryClass, summaryNeedsQuery); + int compressionLimit = result.getQuery().properties().getInteger(PACKET_COMPRESSION_LIMIT, 0); + docsumsPacket.setCompressionLimit(compressionLimit); + if (compressionLimit != 0) { + docsumsPacket.setCompressionType(result.getQuery().properties().getString(PACKET_COMPRESSION_TYPE, "lz4")); + } + + if (isLoggingFine()) + getLogger().finest("Sending " + docsumsPacket + " on " + channel); + + boolean couldSend = channel.sendPacket(docsumsPacket); + if ( ! couldSend) throw new IOException("Could not successfully send GetDocSumsPacket."); + receivedPackets = channel.receivePackets(Math.max(50, result.getQuery().getTimeLeft()), docsumsPacket.getNumDocsums() + 1); + + if (isLoggingFine()) + getLogger().finest("got " + receivedPackets.length + "docsumPackets"); + + return convertBasicPackets(receivedPackets); + } + + /** + * Returns whether we need to send the query when fetching summaries. + * This is necessary if the query requests summary features or dynamic snippeting + */ + private boolean summaryNeedsQuery(Query query) { + if (query.getRanking().getQueryCache()) return false; // Query is cached in backend + + DocumentDatabase documentDb = getDocumentDatabase(query); + + // Needed to generate a dynamic summary? + DocsumDefinition docsumDefinition = documentDb.getDocsumDefinitionSet().getDocsumDefinition(query.getPresentation().getSummary()); + if (docsumDefinition == null) return true; // stay safe + if (docsumDefinition.isDynamic()) return true; + + // Needed to generate ranking features? + RankProfile rankProfile = documentDb.rankProfiles().get(query.getRanking().getProfile()); + if (rankProfile == null) return true; // stay safe + if (rankProfile.hasSummaryFeatures()) return true; + if (query.getRanking().getListFeatures()) return true; + + // (Don't just add other checks here as there is a return false above) + + return false; + } + + /** + * Whether to mask out the row id from the index uri. + * Masking out the row number is useful when it is necessary to deduplicate + * across rows. That is necessary with searchers which issues several queries + * to produce one result in the first phase, as the grouping searcher - when + * some of those searchers go to different rows, a mechanism is needed to detect + * duplicates returned from different rows before the summary is requested. + * Producing an index id which is the same across rows and using that as the + * hit uri provides this. Note that this only works if the document ids are the + * same for all the nodes (rows) in a column. This is usually the case for + * batch and incremental indexing, but not for realtime. + */ + + public long getEditionTimeStamp() { + return editionTimeStamp; + } + + public void setEditionTimeStamp(long editionTime) { + this.editionTimeStamp = editionTime; + } + + public String toString() { + return "fast searcher (" + getName() + ") " + backend; + } + + /** + * Returns an array of the hits contained in this result + * + * @param filled true to return all hits, false to return only unfilled hits + * @return array of docids, empty array if no hits + */ + private DocsumPacketKey[] getPacketKeys(Result result, String summaryClass, boolean filled) { + DocsumPacketKey[] packetKeys = new DocsumPacketKey[result.getHitCount()]; + int x = 0; + + for (Iterator<com.yahoo.search.result.Hit> i = hitIterator(result); i.hasNext();) { + com.yahoo.search.result.Hit hit = i.next(); + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + if(filled || !fastHit.isFilled(summaryClass)) { + packetKeys[x] = new DocsumPacketKey(fastHit.getGlobalId(), fastHit.getPartId(), summaryClass); + x++; + } + } + } + if (x < packetKeys.length) { + DocsumPacketKey[] tmp = new DocsumPacketKey[x]; + + System.arraycopy(packetKeys, 0, tmp, 0, x); + return tmp; + } else { + return packetKeys; + } + } + + protected boolean isLoggingFine() { + return getLogger().isLoggable(Level.FINE); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java new file mode 100644 index 00000000000..b622f5c62c5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FeatureDataField.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.search.result.FeatureData; + +/** + * Class representing a "feature data" field. This was historically + * just a string containing JSON; now it's a structure of + * data (that will be rendered as JSON by default). + */ +public class FeatureDataField extends LongstringField { + + public FeatureDataField (String name) { + super(name); + } + + @Override + public String toString() { + return "field " + getName() + " type FeatureDataField"; + } + + public Object convert(Inspector value) { + if (! value.valid()) { + if (getEmulConfig().stringBackedFeatureData()) { + return ""; + } else if (getEmulConfig().forceFillEmptyFields()) { + return new FeatureData(com.yahoo.data.access.simple.Value.empty()); + } else { + return null; + } + } + if (value.type() == Type.STRING) { + return value.asString(); + } + FeatureData obj = new FeatureData(value); + if (getEmulConfig().stringBackedFeatureData()) { + return obj.toJson(); + } else { + return obj; + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java new file mode 100644 index 00000000000..ed5c7edd4da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/FloatField.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias M\u00f8lster Lidal</a> + */ +public class FloatField extends DocsumField { + static final double EMPTY_VALUE = Float.NaN; + + public FloatField(String name) { + super(name); + } + + private Object convert(float value) { + if (Float.isNaN(value)) { + return NanNumber.NaN; + } else { + return Float.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getFloat()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Float.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((float)value.asDouble(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java new file mode 100644 index 00000000000..f8425ba8cfd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.List; + +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.search.result.Hit; +import com.yahoo.searchlib.aggregation.Grouping; + +// TODO: Author! +public class GroupingListHit extends Hit { + private static final long serialVersionUID = -6645125887873082234L; + + /** for unit tests only, may give problems if grouping contains docsums */ + public GroupingListHit(List<Grouping> groupingList) { + this(groupingList, null); + } + + public GroupingListHit(List<Grouping> groupingList, + DocsumDefinitionSet defs) + { + super("meta:grouping", 0); + this.groupingList = groupingList; + this.defs = defs; + } + public boolean isMeta() { return true; } + + public List<Grouping> getGroupingList() { return groupingList; } + public DocsumDefinitionSet getDocsumDefinitionSet() { return defs; } + + private final List<Grouping> groupingList; + private final DocsumDefinitionSet defs; + private QueryPacketData queryPacketData; + + public void setQueryPacketData(QueryPacketData queryPacketData) { + this.queryPacketData = queryPacketData; + } + + /** Returns encoded query data from the query used to create this, or null if none present */ + public QueryPacketData getQueryPacketData() { + return queryPacketData; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java new file mode 100644 index 00000000000..2759f313d52 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/Int64Field.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a integer field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class Int64Field extends DocsumField { + static final long EMPTY_VALUE = Long.MIN_VALUE; + + public Int64Field(String name) { + super(name); + } + + private Object convert(long value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Long.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getLong()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public String toString() { + return "field " + getName() + " type int64"; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Long.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert(value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java new file mode 100644 index 00000000000..b134ea49bac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/IntegerField.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a integer field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class IntegerField extends DocsumField { + static final int EMPTY_VALUE = Integer.MIN_VALUE; + + public IntegerField(String name) { + super(name); + } + + private Object convert(int value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Integer.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getInt()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public String toString() { + return "field " + getName() + " type int"; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Integer.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((int)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java new file mode 100644 index 00000000000..d61a15723ac --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/JSONField.java @@ -0,0 +1,180 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.*; +import com.yahoo.data.access.simple.Value; + + +/** + * Class representing a JSON string field in the result set + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class JSONField extends DocsumField implements VariableLengthField { + public JSONField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + JSONString field; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + + field = new JSONString(Utf8.toString(tmp)); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type JSONString"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + private static class CompatibilityConverter { + Value.ArrayValue target = new Value.ArrayValue(); + + Inspector stringify(Inspector value) { + if (value.type() == Type.STRING) return value; + if (value.type() == Type.LONG) { + String str = String.valueOf(value.asLong()); + return new Value.StringValue(str); + } + if (value.type() == Type.DOUBLE) { + String str = String.valueOf(value.asDouble()); + return new Value.StringValue(str); + } + String str = value.toString(); + return new Value.StringValue(str); + } + } + + private static class ArrConv extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + target.add(stringify(value)); + } + } + + private static class WsConv1 extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + Value.ArrayValue obj = new Value.ArrayValue(); + obj.add(stringify(value.entry(0))); + obj.add(value.entry(1)); + target.add(obj); + } + } + + private static class WsConv2 extends CompatibilityConverter + implements ArrayTraverser + { + @Override + public void entry(int idx, Inspector value) { + Value.ArrayValue obj = new Value.ArrayValue(); + obj.add(stringify(value.field("item"))); + obj.add(value.field("weight")); + target.add(obj); + } + } + + static Inspector convertTop(Inspector value) { + if (value.type() == Type.ARRAY && value.entryCount() > 0) { + Inspector first = value.entry(0); + if (first.type() == Type.ARRAY && first.entryCount() == 2) { + // old style weighted set + WsConv1 conv = new WsConv1(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.OBJECT && + first.fieldCount() == 2 && + first.field("item").valid() && + first.field("weight").valid()) + { + // new style weighted set + WsConv2 conv = new WsConv2(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.LONG) { + ArrConv conv = new ArrConv(); + value.traverse(conv); + return conv.target; + } + if (first.type() == Type.DOUBLE) { + ArrConv conv = new ArrConv(); + value.traverse(conv); + return conv.target; + } + } + return value; + } + + public Object convert(Inspector value) { + if (value.valid()) { + return new JSONString(convertTop(value)); + } else { + return new JSONString(""); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java new file mode 100644 index 00000000000..617f382f462 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongdataField.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a long data field in the result set. + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.RawData; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class LongdataField extends DocsumField implements VariableLengthField { + public LongdataField(String name) { + super(name); + } + + private Object convert(byte[] value) { + return new RawData(value); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + return convert(tmp); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return convert(value.asData(Value.empty().asData())); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java new file mode 100644 index 00000000000..744476beaa5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/LongstringField.java @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a long string field in the result set. + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class LongstringField extends DocsumField implements VariableLengthField { + public LongstringField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + String field; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + field = Utf8.toString(tmp); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return value.asString(""); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java new file mode 100644 index 00000000000..e5a7d433324 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketCache.java @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.logging.Logger; + +import com.yahoo.log.LogLevel; + + +/** + * An LRU cache using number of hits cached inside the results as + * size limiting factor. Directly modelled after com.yahoo.collections.Cache. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class PacketCache extends LinkedHashMap<CacheKey, PacketWrapper> { + + /** + * + */ + private static final long serialVersionUID = -7403077211906108356L; + + /** The <i>current</i> number of bytes of packets in this cache */ + private int totalSize; + + /** The maximum number of bytes of packets in this cache */ + private final int capacity; + + /** The max size of a cached item compared to the total size */ + private int maxCacheItemPercentage = 1; + + /** The max age for a valid cache entry, 0 mean infinite */ + private final long maxAge; + + private static final Logger log = Logger.getLogger(PacketCache.class.getName()); + + public void clear() { + super.clear(); + totalSize = 0; + } + + /** + * Sets the max size of a cached item compared to the total size + * Cache requests for larger objects will be ignored + */ + public void setMaxCacheItemPercentage(int maxCapacityPercentage) { + maxCacheItemPercentage = maxCapacityPercentage; + } + + /** + * Creates a cache with a size given by + * cachesizemegabytes*2^20+cachesizebytes + * + * @param capacityMegaBytes the cache size, measured in megabytes + * @param capacityBytes additional number of bytes to add to the cache size + * @param maxAge seconds a cache entry is valid, 0 or less are illegal arguments + */ + public PacketCache(int capacityMegaBytes,int capacityBytes,double maxAge) { + // hardcoded inital entry capacity, won't matter much anyway + // after a while + super(12500, 1.0f, true); + if (maxAge <= 0.0d) { + throw new IllegalArgumentException("maxAge <= 0 not legal on 5.1, use some very large number for no timeout."); + } + if (capacityMegaBytes > (Integer.MAX_VALUE >> 20)) { + log.log(LogLevel.INFO, "Packet cache of more than 2 GB requested. Reverting to 2 GB packet cache."); + this.capacity = Integer.MAX_VALUE; + } else { + this.capacity = (capacityMegaBytes << 20) + capacityBytes; + } + if (this.capacity <= 0) { + throw new IllegalArgumentException("Total cache size set to 0 or less bytes. If no caching is desired, avoid creating this object instead."); + } + this.maxAge = (long) (maxAge * 1000.0d); + } + + /** + * Overrides LinkedHashMap.removeEldestEntry as suggested to implement LRU cache. + */ + protected boolean removeEldestEntry(Map.Entry<CacheKey, PacketWrapper> eldest) + { + if (totalSize > capacity) { + totalSize -= eldest.getValue().getPacketsSize(); + return true; + } + return false; + } + + private void removeOverflow() { + if (totalSize < capacity) return; + + for (Iterator<PacketWrapper> i = values().iterator(); i.hasNext();) { + PacketWrapper eldestEntry = i.next(); + totalSize -= eldestEntry.getPacketsSize(); + + i.remove(); + if (totalSize < capacity) { + break; + } + } + } + + public int getCapacity() { + return capacity >> 20; + } + + public int getByteCapacity() { + return capacity; + } + + /** + * Adds a PacketWrapper object to this cache, + * unless the size is more than maxCacheItemPercentage of the total size + */ + public PacketWrapper put(CacheKey key, PacketWrapper value) { + return put(key, value, System.currentTimeMillis()); + } + + /** + * Adds a BasicPacket array to this cache, + * unless the size is more than maxCacheItemPercentage of the total size + * + * @param timestamp the timestamp for the first packet in the array, + * unit milliseconds + */ + public PacketWrapper put(CacheKey key, PacketWrapper result, long timestamp) { + int size = result.getPacketsSize(); + + if (size > 0) { + result.setTimestamp(timestamp); + } + + // don't insert if it is too big + if (size * 100 > capacity * maxCacheItemPercentage) { + // removeField the old one since that is now stale. + return remove(key); + } + + totalSize += size; + PacketWrapper previous = super.put(key, result); + if (previous != null) { + totalSize -= previous.getPacketsSize(); + } + if (totalSize > (capacity * 1.1)) { + removeOverflow(); + } + + return previous; + } + + public PacketWrapper get(CacheKey key) { + return get(key, System.currentTimeMillis()); + } + + public PacketWrapper get(CacheKey key, long now) { + PacketWrapper result = super.get(key); + + if (result == null) { + return result; + } + + long timestamp = result.getTimestamp(); + + if ((now - timestamp) > maxAge) { + remove(key); + return null; + } else { + return result; + } + } + + public PacketWrapper remove(CacheKey key) { + PacketWrapper removed = super.remove(key); + + if (removed != null) { + totalSize -= removed.getPacketsSize(); + } + return removed; + } + + public int totalPacketSize() { + return totalSize; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java new file mode 100644 index 00000000000..1cc9678843c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/PacketWrapper.java @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +import java.util.*; +import java.util.logging.Logger; + +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.DocsumPacket; +import com.yahoo.fs4.DocumentInfo; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.document.GlobalId; +import com.yahoo.document.DocumentId; + + +/** + * A wrapper for cache entries to make it possible to check whether the + * hits are truly correct. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + */ +public class PacketWrapper implements Cloneable { + private static Logger log = Logger.getLogger(PacketWrapper.class.getName()); + + final int keySize; + // associated result packets, sorted in regard to offset + private ArrayList<BasicPacket> resultPackets = new ArrayList<>(3); // length = "some small number" + LinkedHashMap<DocsumPacketKey, BasicPacket> packets; + + private static class ResultPacketComparator<T extends BasicPacket> implements Comparator<T> { + @Override + public int compare(T o1, T o2) { + QueryResultPacket r1 = (QueryResultPacket) o1; + QueryResultPacket r2 = (QueryResultPacket) o2; + return r1.getOffset() - r2.getOffset(); + } + } + + private static ResultPacketComparator<BasicPacket> resultPacketComparator = new ResultPacketComparator<>(); + + public PacketWrapper(CacheKey key, DocsumPacketKey[] packetKeys, BasicPacket[] bpackets) { + // Should not support key == null + this.keySize = key.byteSize(); + resultPackets.add(bpackets[0]); + this.packets = new LinkedHashMap<>(); + Packet[] ppackets = new Packet[packetKeys.length]; + + for (int i = 0; i < packetKeys.length; i++) { + ppackets[i] = (Packet) bpackets[i + 1]; + } + addDocsums(packetKeys, ppackets); + } + + /** + * Only used by PacketCacheTestCase, should not be used otherwise + */ + public PacketWrapper(CacheKey key, BasicPacket[] packets) { + // Should support key == null as this is for testing + if (key == null) { + keySize = 0; + } else { + this.keySize = key.byteSize(); + } + resultPackets.add(packets[0]); + this.packets = new LinkedHashMap<>(); + for (int i = 0; i < packets.length - 1; i++) { + this.packets.put(new DocsumPacketKey(new GlobalId(new DocumentId("doc:test:" + i).getGlobalId()), i, null), packets[i + 1]); + } + + } + + public QueryResultPacket getFirstResultPacket() { + if (resultPackets.size() > 0) { + return (QueryResultPacket) resultPackets.get(0); + } else { + return null; + } + } + + /** + * @return list of documents, null if not all are available + */ + public List<DocumentInfo> getDocuments(int offset, int hits) { + // speculatively allocate list for the hits + List<DocumentInfo> docs = new ArrayList<>(hits); + int currentOffset = 0; + QueryResultPacket r = getFirstResultPacket(); + if (offset >= r.getTotalDocumentCount()) { + // shortcut especially for results with 0 hits + // >= both necessary for end of result sets and + // offset == 0 && totalDocumentCount == 0 + return docs; + } + for (Iterator<BasicPacket> i = resultPackets.iterator(); i.hasNext();) { + QueryResultPacket result = (QueryResultPacket) i.next(); + if (result.getOffset() > offset + currentOffset) { + // we haven't got all the requested document info objects + return null; + } + if (result.getOffset() + result.getDocumentCount() + <= currentOffset + offset) { + // no new hits available + continue; + } + List<DocumentInfo> documents = result.getDocuments(); + int packetOffset = (offset + currentOffset) - result.getOffset(); + int afterLastDoc = Math.min(documents.size(), packetOffset + hits); + for (Iterator<DocumentInfo> j = documents.subList(packetOffset, afterLastDoc).iterator(); + docs.size() < hits && j.hasNext(); + ++currentOffset) { + docs.add(j.next()); + } + if (hits == docs.size() + || offset + docs.size() >= result.getTotalDocumentCount()) { + // We have the hits we need, or there are no more hits available + return docs; + } + } + return null; + } + + public void addResultPacket(QueryResultPacket resultPacket) { + // This function only keeps the internal list sorted according + // to offset + int insertionPoint; + QueryResultPacket r; + + if (resultPacket.getDocumentCount() == 0) { + return; // do not add a packet which does not contain new info + } + + insertionPoint = Collections.binarySearch(resultPackets, + resultPacket, + resultPacketComparator); + if (insertionPoint < 0) { + // new offset + insertionPoint = ~insertionPoint; // (insertionPoint + 1) * -1; + resultPackets.add(insertionPoint, resultPacket); + cleanResultPackets(); + } else { + // there exists a packet with same offset + r = (QueryResultPacket) resultPackets.get(insertionPoint); + if (resultPacket.getDocumentCount() > r.getDocumentCount()) { + resultPackets.set(insertionPoint, resultPacket); + cleanResultPackets(); + } + } + } + + private void cleanResultPackets() { + int marker; + QueryResultPacket previous; + if (resultPackets.size() == 1) { + return; + } + + // we know the list is sorted with regard to offset + // First ensure the list grows in regards to lastOffset as well. + // Could have done this addResultPacket, but this makes the code + // simpler. + previous = (QueryResultPacket) resultPackets.get(0); + for (int i = 1; i < resultPackets.size(); ++i) { + QueryResultPacket r = (QueryResultPacket) resultPackets.get(i); + if (r.getOffset() + r.getDocumentCount() + <= previous.getOffset() + previous.getDocumentCount()) { + resultPackets.remove(i--); + } else { + previous = r; + } + } + + marker = 0; + while (marker < (resultPackets.size() - 2)) { + QueryResultPacket r0 = (QueryResultPacket) resultPackets.get(marker); + QueryResultPacket r1 = (QueryResultPacket) resultPackets.get(marker + 1); + QueryResultPacket r2 = (QueryResultPacket) resultPackets.get(marker + 2); + int nextOffset = r0.getOffset() + r0.getDocumentCount(); + + if (r1.getOffset() < nextOffset + && r2.getOffset() <= nextOffset) { + resultPackets.remove(marker + 1); + } + ++marker; + } + } + + /** Only for testing. */ + public List<BasicPacket> getResultPackets() { + return resultPackets; + } + + public void addDocsums(DocsumPacketKey[] packetKeys, BasicPacket[] bpackets, + int offset) { + Packet[] ppackets = new Packet[packetKeys.length]; + + for (int i = 0; i < packetKeys.length; i++) { + ppackets[i] = (Packet) bpackets[i + offset]; + } + addDocsums(packetKeys, ppackets); + } + + public void addDocsums(DocsumPacketKey[] packetKeys, Packet[] packets) { + if (packetKeys == null || packets == null) { + log.warning( + "addDocsums called with " + + (packetKeys == null ? "packetKeys == null " : "") + + (packets == null ? "packets == null" : "")); + return; + } + for (int i = 0; i < packetKeys.length && i < packets.length; i++) { + if (packetKeys[i] == null) { + log.warning( + "addDocsums called, but packetsKeys[" + i + "] is null"); + } else if (packets[i] instanceof DocsumPacket) { + DocsumPacket dp = (DocsumPacket) packets[i]; + + if (packetKeys[i].getGlobalId().equals(dp.getGlobalId()) + && dp.getData().length > 0) + { + this.packets.put(packetKeys[i], packets[i]); + log.fine("addDocsums " + i + " globalId: " + dp.getGlobalId()); + } else { + log.warning("not caching bad Docsum for globalId " + packetKeys[i].getGlobalId() + ": " + dp); + } + } else { + log.warning( + "addDocsums called, but packets[" + i + + "] is not a DocsumPacket instance"); + } + } + } + + public int getNumPackets() { + return packets.size(); + } + + BasicPacket getPacket(GlobalId globalId, int partid, String summaryClass) { + return getPacket( + new DocsumPacketKey(globalId, partid, summaryClass)); + } + + BasicPacket getPacket(DocsumPacketKey packetKey) { + return packets.get(packetKey); + } + + long getTimestamp() { + return getFirstResultPacket().getTimestamp(); + } + + public void setTimestamp(long timestamp) { + getFirstResultPacket().setTimestamp(timestamp); + } + + public int getPacketsSize() { + int size = 0; + + for (Iterator<BasicPacket> i = resultPackets.iterator(); i.hasNext();) { + QueryResultPacket r = (QueryResultPacket) i.next(); + int l = r.getLength(); + + if (l < 0) { + log.warning("resultpacket length " + l); + l = 10240; + } + size += l; + } + for (Iterator<BasicPacket> i = packets.values().iterator(); i.hasNext();) { + BasicPacket packet = i.next(); + int l = packet.getLength(); + + if (l < 0) { + log.warning("BasicPacket length " + l); + l = 10240; + } + size += l; + } + size += keySize; + return size; + } + + /** + * Straightforward shallow copy. + */ + @SuppressWarnings("unchecked") + public Object clone() { + try { + PacketWrapper other = (PacketWrapper) super.clone(); + other.resultPackets = (ArrayList<BasicPacket>) resultPackets.clone(); + if (packets != null) { + other.packets = (LinkedHashMap<DocsumPacketKey, BasicPacket>) packets.clone(); + } + return other; + } catch (CloneNotSupportedException e) { + throw new RuntimeException("A non-cloneable superclass has been inserted.", + e); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java new file mode 100644 index 00000000000..66931f37369 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/RankProfile.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +/** + * Information about a rank profile + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +class RankProfile { + + private final String name; + + private final boolean hasSummaryFeatures; + + private final boolean hasRankFeatures; + + public RankProfile(String name, boolean hasSummaryFeatures, boolean hasRankFeatures) { + this.name = name; + this.hasSummaryFeatures = hasSummaryFeatures; + this.hasRankFeatures = hasRankFeatures; + } + + public String getName() { return name; } + + /** Returns true if this rank profile has summary features */ + public boolean hasSummaryFeatures() { return hasSummaryFeatures; } + + /** Returns true if this rank profile has rank features */ + public boolean hasRankFeatures() { return hasRankFeatures; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java new file mode 100644 index 00000000000..e9c19590102 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/ShortField.java @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a short field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.search.result.NanNumber; +import com.yahoo.data.access.Inspector; + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ + +public class ShortField extends DocsumField { + static final short EMPTY_VALUE = Short.MIN_VALUE; + + public ShortField(String name) { + super(name); + } + + private Object convert(short value) { + if (value == EMPTY_VALUE) { + return NanNumber.NaN; + } else { + return Short.valueOf(value); + } + } + + public Object decode(ByteBuffer b) { + return convert(b.getShort()); + } + + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + public int getLength(ByteBuffer b) { + int offset = b.position(); + final int bytelength = Short.SIZE >> 3; + b.position(offset + bytelength); + return bytelength; + } + + public Object convert(Inspector value) { + return convert((short)value.asLong(EMPTY_VALUE)); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java new file mode 100644 index 00000000000..671188e4cae --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StringField.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a string field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:borud@yahoo-inc.com">Bj\u00f8rn Borud</a> + */ +public class StringField extends DocsumField implements VariableLengthField { + public StringField(String name) { + super(name); + } + + @Override + public Object decode(ByteBuffer b) { + int length = ((int) b.getShort()) & 0xffff; + Object field; + + field = Utf8.toString(b.array(), b.arrayOffset() + b.position(), length); + b.position(b.position() + length); + return field; + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type string"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + int len = ((int) b.getShort()) & 0xffff; + b.position(offset + len + (Short.SIZE >> 3)); + return len + (Short.SIZE >> 3); + } + + @Override + public int sizeOfLength() { + return Short.SIZE >> 3; + } + + @Override + public Object convert(Inspector value) { + return value.asString(""); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java new file mode 100644 index 00000000000..f0f4b82c22a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/StructDataField.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import com.yahoo.search.result.StructuredData; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Type; +import com.yahoo.container.search.LegacyEmulationConfig; +import com.yahoo.prelude.hitfield.JSONString; + +/** + * Class representing a XML rendered structured data field in the result set + */ +public class StructDataField extends JSONField { + + public StructDataField(String name) { + super(name); + } + + @Override + public String toString() { + return "field " + getName() + " type StructDataField"; + } + + public Object convert(Inspector value) { + if (getEmulConfig().stringBackedStructuredData() || + value.type() == Type.STRING) + { + return super.convert(value); + } + return new StructuredData(value); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java new file mode 100644 index 00000000000..97a711d8590 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/SummaryParameters.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + + +/** + * Wrapper for document summary parameters and configuration. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SummaryParameters { + + public final String defaultClass; + + public SummaryParameters(String defaultClass) { + if (defaultClass != null && defaultClass.isEmpty()) + this.defaultClass = null; + else + this.defaultClass = defaultClass; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java new file mode 100644 index 00000000000..8c3d587a059 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/TimeoutException.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.io.IOException; + +/** + * Thrown on communication timeouts + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings("serial") +public class TimeoutException extends IOException { + + public TimeoutException(String message) { + super(message); + } +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java new file mode 100644 index 00000000000..f169533f8db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VariableLengthField.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +/** + * Interface to easier find the start of the actual data for variable length + * fields. + * + * @author <a href="mailt:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface VariableLengthField { + public int sizeOfLength(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java new file mode 100644 index 00000000000..820c764de06 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/VespaBackEndSearcher.java @@ -0,0 +1,653 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.fastsearch; + +import java.util.Optional; +import com.yahoo.collections.TinyIdentitySet; +import com.yahoo.fs4.BasicPacket; +import com.yahoo.fs4.DocsumPacket; +import com.yahoo.fs4.DocumentInfo; +import com.yahoo.fs4.ErrorPacket; +import com.yahoo.fs4.QueryPacketData; +import com.yahoo.fs4.Packet; +import com.yahoo.fs4.QueryPacket; +import com.yahoo.fs4.QueryResultPacket; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.io.HexDump; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.textualrepresentation.TextualQueryRepresentation; +import com.yahoo.prelude.querytransform.QueryRewrite; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.protect.Validator; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.cluster.PingableSearcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.result.Coverage; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.searchlib.aggregation.Grouping; +import com.yahoo.vespa.objects.BufferSerializer; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; + + +/** + * Superclass for backend searchers. + * + * @author <a href="mailto:balder@yahoo-inc.com">Henning Baldersheim</a> + */ +@SuppressWarnings("deprecation") +public abstract class VespaBackEndSearcher extends PingableSearcher { + + private static final CompoundName grouping=new CompoundName("grouping"); + private static final CompoundName combinerows=new CompoundName("combinerows"); + protected static final CompoundName PACKET_COMPRESSION_LIMIT = new CompoundName("packetcompressionlimit"); + protected static final CompoundName PACKET_COMPRESSION_TYPE = new CompoundName("packetcompressiontype"); + protected static final CompoundName TRACE_DISABLE = new CompoundName("trace.disable"); + + /** The set of all document databases available in the backend handled by this searcher */ + private Map<String, DocumentDatabase> documentDbs = new LinkedHashMap<>(); + private DocumentDatabase defaultDocumentDb = null; + + /** Default docsum class. null means "unset" and is the default value */ + private String defaultDocsumClass = null; + + /** Returns an iterator which returns all hits below this result **/ + protected Iterator<Hit> hitIterator(Result result) { + return result.hits().unorderedDeepIterator(); + } + + private boolean localDispatching = true; + + /** The name of this source */ + private String name; + + /** Cache wrapper */ + protected CacheControl cacheControl = null; + /** + * The number of last significant bits in the partId which specifies the + * row number in this backend, + * the rest specifies the column. 0 if not known. + */ + private int rowBits = 0; + /** Searchcluster number */ + private int sourceNumber; + + protected final String getName() { return name; } + protected final String getDefaultDocsumClass() { return defaultDocsumClass; } + + /** Sets default document summary class. Default is null */ + private void setDefaultDocsumClass(String docsumClass) { defaultDocsumClass = docsumClass; } + + /** Returns the packet cache controller of this */ + public final CacheControl getCacheControl() { return cacheControl; } + + /** + * Searches a search cluster + * This is an endpoint - searchers will never propagate the search to any nested searcher. + * + * @param query the query to search + * @param queryPacket the serialized query representation to pass to the search cluster + * @param cacheKey the cache key created from the query packet, or null if caching is not used + * @param execution the query execution context + */ + protected abstract Result doSearch2(Query query, QueryPacket queryPacket, CacheKey cacheKey, Execution execution); + + protected abstract void doPartialFill(Result result, String summaryClass); + + private Result cacheLookupFirstPhase(CacheKey key, QueryPacketData queryPacketData, Query query, int offset, int hits, String summaryClass) throws IOException { + PacketWrapper packetWrapper = cacheControl.lookup(key, query); + + if (packetWrapper == null) return null; + + // Check if the cache entry contains the requested hits + List<DocumentInfo> documents = packetWrapper.getDocuments(offset, hits); + if (documents == null) return null; + + if (query.getPresentation().getSummary() == null) + query.getPresentation().setSummary(getDefaultDocsumClass()); + Result result = new Result(query); + QueryResultPacket resultPacket = packetWrapper.getFirstResultPacket(); + + addMetaInfo(query, queryPacketData, resultPacket, result, true); + if (packetWrapper.getNumPackets() == 0) + addUnfilledHits(result, documents, true, queryPacketData, key); + else + addCachedHits(result, packetWrapper, summaryClass, documents); + return result; + } + + + protected DocumentDatabase getDocumentDatabase(Query query) { + if (query.getModel().getRestrict().size() == 1) { + String docTypeName = (String)query.getModel().getRestrict().toArray()[0]; + DocumentDatabase db = documentDbs.get(docTypeName); + if (db != null) { + return db; + } + } + return defaultDocumentDb; + } + + private void resolveDocumentDatabase(Query query) { + DocumentDatabase docDb = getDocumentDatabase(query); + if (docDb != null) { + query.getModel().setDocumentDb(docDb.getName()); + } + } + + public final void init(SummaryParameters docSumParams, ClusterParams clusterParams, CacheParams cacheParams, + DocumentdbInfoConfig documentdbInfoConfig) { + this.name = clusterParams.searcherName; + this.sourceNumber = clusterParams.clusterNumber; + this.rowBits = clusterParams.rowBits; + + Validator.ensureNotNull("Name of Vespa backend integration", getName()); + + setDefaultDocsumClass(docSumParams.defaultClass); + + if (documentdbInfoConfig != null) { + for (DocumentdbInfoConfig.Documentdb docDb : documentdbInfoConfig.documentdb()) { + DocumentDatabase db = new DocumentDatabase(docDb, clusterParams.emulation); + if (documentDbs.isEmpty()) { + defaultDocumentDb = db; + } + documentDbs.put(docDb.name(), db); + } + } + + if (cacheParams.cacheControl == null) { + this.cacheControl = new CacheControl(cacheParams.cacheMegaBytes, cacheParams.cacheTimeOutSeconds); + } else { + this.cacheControl = cacheParams.cacheControl; + } + } + + protected void transformQuery(Query query) { } + + public Result search(Query query, Execution execution) { + // query root should not be null here + Item root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) { + return new Result(query, ErrorMessage.createNullQuery(query.getHttpRequest().getUri().toString())); + } + + QueryRewrite.optimizeByRestrict(query); + QueryRewrite.optimizeAndNot(query); + QueryRewrite.collapseSingleComposites(query); + + root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) // root can become null after optimization + return new Result(query); + + resolveDocumentDatabase(query); + transformQuery(query); + traceQuery(name, "search", query, query.getOffset(), query.getHits(), 1, Optional.<String>empty()); + + root = query.getModel().getQueryTree().getRoot(); + if (root == null || root instanceof NullItem) // root can become null after resolving and transformation? + return new Result(query); + + QueryPacket queryPacket = QueryPacket.create(query); + int compressionLimit = query.properties().getInteger(PACKET_COMPRESSION_LIMIT, 0); + queryPacket.setCompressionLimit(compressionLimit); + if (compressionLimit != 0) { + queryPacket.setCompressionType(query.properties().getString(PACKET_COMPRESSION_TYPE, "lz4")); + } + + if (isLoggingFine()) + getLogger().fine("made QueryPacket: " + queryPacket); + + Result result = null; + CacheKey cacheKey = null; + if (cacheControl.useCache(query)) { + cacheKey = new CacheKey(queryPacket); + result = getCached(cacheKey, queryPacket.getQueryPacketData(), query); + } + + if (result == null) { + String next = null; + result = doSearch2(query, queryPacket, cacheKey, execution); + if (isLoggingFine()) { + getLogger().fine("Result NOT retrieved from cache"); + } + + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " dispatch response: " + result, false, 1); + } + result.trace(getName()); + } + return result; + } + + /** + * Returns a cached result, or null if no result was cached for this key + * + * @param cacheKey the cache key created from the query packet + * @param queryPacketData a serialization of the query, to avoid having to recompute this, or null if not available + * @param query the query, used for tracing, lookup of result window and result creation + */ + private Result getCached(CacheKey cacheKey, QueryPacketData queryPacketData, Query query) { + if (query.getTraceLevel() >= 6) { + query.trace("Cache key hash: " + cacheKey.hashCode(), 6); + if (query.getTraceLevel() >= 8) { + query.trace("Cache key: " + HexDump.toHexString(cacheKey.getCopyOfFullKey()), 8); + } + } + + try { + Result result = cacheLookupFirstPhase(cacheKey, queryPacketData, query, query.getOffset(), query.getHits(), query.getPresentation().getSummary()); + if (result == null) return null; + + if (isLoggingFine()) { + getLogger().fine("Result retrieved from cache: " + result); + } + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " cached response: " + result, false, 1); + } + result.trace(getName()); + return result; + } + catch (IOException e) { + Result result = new Result(query); + + if (result.hits().getErrorHit() == null) { + result.hits().setError(ErrorMessage.createBackendCommunicationError( + "Fast Search (" + getName() + ") failed: " + e.getMessage())); + } + if (query.getTraceLevel() >= 1) { + query.trace(getName() + " error response: " + result, false, 1); + } + return result; + } + } + + private List<Result> partitionHits(Result result, String summaryClass) { + List<Result> parts = new ArrayList<>(); + TinyIdentitySet<Query> queryMap = new TinyIdentitySet<>(4); + + for (Iterator<Hit> itr = hitIterator(result); itr.hasNext(); ) { + Hit hit = itr.next(); + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + if (!fastHit.isFilled(summaryClass)) { + Query q = fastHit.getQuery(); + if (q == null) { + q = result.hits().getQuery(); // fallback for untagged hits + } + int idx = queryMap.indexOf(q); + if (idx < 0) { + idx = queryMap.size(); + Result r = new Result(q); + parts.add(r); + queryMap.add(q); + } + parts.get(idx).hits().add(fastHit); + } + } + } + return parts; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + if (result.isFilled(summaryClass)) return; // TODO: Checked in the superclass - remove + + List<Result> parts= partitionHits(result, summaryClass); + if (parts.size() > 0) { // anything to fill at all? + for (Result r : parts) { + doPartialFill(r, summaryClass); + mergeErrorsInto(result, r); + } + result.hits().setSorted(false); + result.analyzeHits(); + } + } + + private void mergeErrorsInto(Result destination, Result source) { + ErrorHit eh = source.hits().getErrorHit(); + if (eh != null) { + for (ErrorMessage error : eh.errors()) + destination.hits().addError(error); + } + } + + static void traceQuery(String sourceName, String type, Query query, int offset, int hits, int level, Optional<String> quotedSummaryClass) { + if ((query.getTraceLevel()<level) || query.properties().getBoolean(TRACE_DISABLE)) return; + + StringBuilder s = new StringBuilder(); + s.append(sourceName).append(" " + type + " to dispatch: ") + .append("query=[") + .append(query.getModel().getQueryTree().getRoot().toString()) + .append("]"); + + s.append(" timeout=").append(query.getTimeout()).append("ms"); + + s.append(" offset=") + .append(offset) + .append(" hits=") + .append(hits); + + if (query.getRanking().hasRankProfile()) { + s.append(" rankprofile[") + .append(query.getRanking().getProfile()) + .append("]"); + } + + if (query.getRanking().getFreshness() != null) { + s.append(" freshness=") + .append(query.getRanking().getFreshness().getRefTime()); + } + + if (query.getRanking().getSorting() != null) { + s.append(" sortspec=") + .append(query.getRanking().getSorting().fieldOrders().toString()); + } + + if (query.getRanking().getLocation() != null) { + s.append(" location=") + .append(query.getRanking().getLocation().toString()); + } + + List<Grouping> grouping = GroupingExecutor.getGroupingList(query); + s.append(" grouping=").append(grouping.size()).append(" : "); + for(Grouping g : grouping) { + s.append(g.toString()); + } + + if ( ! query.getRanking().getProperties().isEmpty()) { + s.append(" rankproperties=") + .append(query.getRanking().getProperties().toString()); + } + + if ( ! query.getRanking().getFeatures().isEmpty()) { + s.append(" rankfeatures=") + .append(query.getRanking().getFeatures().toString()); + } + + if (query.getModel().getRestrict() != null) { + s.append(" restrict=").append(query.getModel().getRestrict().toString()); + } + + if (quotedSummaryClass.isPresent()) { + s.append(" summary=").append(quotedSummaryClass.get()); + } + + query.trace(s.toString(), false, level); + if (query.isTraceable(level + 1)) { + query.trace("Current state of query tree: " + + new TextualQueryRepresentation(query.getModel().getQueryTree().getRoot()), + false, level+1); + } + if (query.isTraceable(level + 2)) { + query.trace("YQL+ representation: " + query.yqlRepresentation(), level+2); + } + } + + protected void addMetaInfo(Query query, QueryPacketData queryPacketData, QueryResultPacket resultPacket, Result result, boolean fromCache) { + result.setTotalHitCount(resultPacket.getTotalDocumentCount()); + + // Grouping + if (resultPacket.getGroupData() != null) { + byte[] data = resultPacket.getGroupData(); + ArrayList<Grouping> list = new ArrayList<>(); + BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer(ByteBuffer.wrap(data))); + int cnt = buf.getInt(null); + for (int i = 0; i < cnt; i++) { + Grouping g = new Grouping(); + g.deserialize(buf); + list.add(g); + } + GroupingListHit hit = new GroupingListHit(list, getDocsumDefinitionSet(query)); + hit.setQuery(result.getQuery()); + hit.setSource(getName()); + hit.setSourceNumber(sourceNumber); + hit.setQueryPacketData(queryPacketData); + result.hits().add(hit); + } + + if (resultPacket.getCoverageFeature()) { + result.setCoverage(new Coverage(resultPacket.getCoverageDocs(), resultPacket.getActiveDocs())); + } + } + + private boolean fillHit(FastHit hit, DocsumPacket packet, String summaryClass) { + if (packet != null) { + byte[] docsumdata = packet.getData(); + if (docsumdata.length > 0) { + decodeSummary(summaryClass, hit, docsumdata); + return true; + } + } + return false; + } + + /** + * Fills the hits. + * + * @return the number of hits that we did not return data for, i.e + * when things are working normally we return 0. + */ + protected int fillHits(Result result, int packetIndex, Packet[] packets, String summaryClass) throws IOException { + int skippedHits=0; + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + + if (hit instanceof FastHit && !hit.isFilled(summaryClass)) { + FastHit fastHit = (FastHit) hit; + + ensureInstanceOf(DocsumPacket.class, packets[packetIndex]); + DocsumPacket docsum = (DocsumPacket) packets[packetIndex]; + + packetIndex++; + if ( ! fillHit(fastHit, docsum, summaryClass)) + skippedHits++; + } + } + result.hits().setSorted(false); + return skippedHits; + } + + /** + * Throws an IOException if the packet is not of the expected type + */ + protected final void ensureInstanceOf(Class<? extends BasicPacket> type, BasicPacket packet) throws IOException { + if ((type.isAssignableFrom(packet.getClass()))) return; + + if (packet instanceof ErrorPacket) { + ErrorPacket errorPacket=(ErrorPacket)packet; + if (errorPacket.getErrorCode() == 8) + throw new TimeoutException("Query timed out in " + getName()); + else + throw new IOException("Received error from backend in " + getName() + ": " + packet); + } else { + throw new IOException("Received " + packet + " when expecting " + type); + } + } + + private boolean addCachedHits(Result result, + PacketWrapper packetWrapper, + String summaryClass, + List<DocumentInfo> documents) { + boolean filledAllOfEm = true; + Query myQuery = result.getQuery(); + + for (DocumentInfo document : documents) { + FastHit hit = new FastHit(); + hit.setQuery(myQuery); + + hit.setUseRowInIndexUri(useRowInIndexUri(result)); + hit.setFillable(); + hit.setCached(true); + + extractDocumentInfo(hit, document); + + DocsumPacket docsum = (DocsumPacket) packetWrapper.getPacket(document.getGlobalId(), document.getPartId(), summaryClass); + + if (docsum != null) { + byte[] docsumdata = docsum.getData(); + + if (docsumdata.length > 0) { + decodeSummary(summaryClass, hit, docsumdata); + } else { + filledAllOfEm = false; + } + } else { + filledAllOfEm = false; + } + + result.hits().add(hit); + + } + + return filledAllOfEm; + } + + private boolean useRowInIndexUri(Result result) { + return ! ((result.getQuery().properties().getString(grouping) != null) || result.getQuery().properties().getBoolean(combinerows)); + } + + private void extractDocumentInfo(FastHit hit, DocumentInfo document) { + hit.setSourceNumber(sourceNumber); + hit.setSource(getName()); + + Number rank = document.getMetric(); + + hit.setRelevance(new Relevance(rank.doubleValue())); + + hit.setDistributionKey(document.getDistributionKey()); + hit.setGlobalId(document.getGlobalId()); + hit.setPartId(document.getPartId(), rowBits); + } + + protected PacketWrapper cacheLookupTwoPhase(CacheKey cacheKey, Result result, String summaryClass) { + Query query = result.getQuery(); + PacketWrapper packetWrapper = cacheControl.lookup(cacheKey, query); + + if (packetWrapper == null) { + return null; + } + if (packetWrapper.getNumPackets() != 0) { + for (Iterator<Hit> i = hitIterator(result); i.hasNext();) { + Hit hit = i.next(); + + if (hit instanceof FastHit) { + FastHit fastHit = (FastHit) hit; + DocsumPacketKey key = new DocsumPacketKey(fastHit.getGlobalId(), fastHit.getPartId(), summaryClass); + + if (fillHit(fastHit, + (DocsumPacket) packetWrapper.getPacket(key), + summaryClass)) { + fastHit.setCached(true); + } + + } + } + result.hits().setSorted(false); + result.analyzeHits(); + } + + return packetWrapper; + } + + protected DocsumDefinitionSet getDocsumDefinitionSet(Query query) { + DocumentDatabase db = getDocumentDatabase(query); + return db.getDocsumDefinitionSet(); + } + + private void decodeSummary(String summaryClass, FastHit hit, byte[] docsumdata) { + DocumentDatabase db = getDocumentDatabase(hit.getQuery()); + hit.setField(Hit.SDDOCNAME_FIELD, db.getName()); + decodeSummary(summaryClass, hit, docsumdata, db.getDocsumDefinitionSet()); + } + + private void decodeSummary(String summaryClass, FastHit hit, byte[] docsumdata, DocsumDefinitionSet docsumSet) { + docsumSet.lazyDecode(summaryClass, docsumdata, hit); + hit.setFilled(summaryClass); + } + + /** + * Creates unfilled hits from a List of DocumentInfo instances. Do note + * cacheKey should be available if a cache is active, even if the hit is not + * created from a cache in the current call path. + * + * @param queryPacketData binary data from first phase of search, or null + * @param cacheKey the key this hit should match in the packet cache, or null + */ + protected boolean addUnfilledHits(Result result, List<DocumentInfo> documents, boolean fromCache, QueryPacketData queryPacketData, CacheKey cacheKey) { + boolean allHitsOK = true; + Query myQuery = result.getQuery(); + + for (DocumentInfo document : documents) { + + try { + FastHit hit = new FastHit(); + hit.setQuery(myQuery); + if (queryPacketData != null) + hit.setQueryPacketData(queryPacketData); + hit.setCacheKey(cacheKey); + + hit.setUseRowInIndexUri(useRowInIndexUri(result)); + hit.setFillable(); + hit.setCached(fromCache); + + extractDocumentInfo(hit, document); + + result.hits().add(hit); + } catch (ConfigurationException e) { + allHitsOK = false; + getLogger().log(LogLevel.WARNING, "Skipping hit", e); + } catch (Exception e) { + allHitsOK = false; + getLogger().log(LogLevel.ERROR, "Skipping malformed hit", e); + } + } + return allHitsOK; + } + + @SuppressWarnings("rawtypes") + public static VespaBackEndSearcher getSearcher(String s) { + try { + Class c = Class.forName(s); + if (VespaBackEndSearcher.class.isAssignableFrom(c)) { + Constructor[] constructors = c.getConstructors(); + for (Constructor constructor : constructors) { + Class[] parameters = constructor.getParameterTypes(); + if (parameters.length == 0) { + return (VespaBackEndSearcher) constructor.newInstance(); + } + } + throw new RuntimeException("Failed initializing " + s); + + } else { + throw new RuntimeException(s + " is not com.yahoo.prelude.fastsearch.VespaBackEndSearcher"); + } + } catch (Exception e) { + throw new RuntimeException("Failure loading class " + s + ", exception :" + e); + } + } + + protected boolean isLoggingFine() { + return getLogger().isLoggable(Level.FINE); + } + public boolean isLocalDispatching() { + return localDispatching; + } + public void setLocalDispatching(boolean localDispatching) { + this.localDispatching = localDispatching; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java new file mode 100644 index 00000000000..0ccc8b03e3b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/XMLField.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Class representing a string field in the result set + * + */ +package com.yahoo.prelude.fastsearch; + + +import java.nio.ByteBuffer; + +import com.yahoo.io.SlowInflate; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.text.Utf8; +import com.yahoo.data.access.Inspector; + + +/** + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class XMLField extends DocsumField implements VariableLengthField { + public XMLField(String name) { + super(name); + } + + private Object convert(String value) { + return new XMLString(value); + } + + @Override + public Object decode(ByteBuffer b) { + long dataLen = 0; + long len = ((long) b.getInt()) & 0xffffffffL; + boolean compressed; + + // if MSB is set this is a compressed field. set the compressed + // flag accordingly and decompress the data + compressed = ((len & 0x80000000) != 0); + if (compressed) { + len &= 0x7fffffff; + dataLen = b.getInt(); + len -= 4; + } + + byte[] tmp = new byte[(int) len]; + + b.get(tmp); + + if (compressed) { + SlowInflate inf = new SlowInflate(); + + tmp = inf.unpack(tmp, (int) dataLen); + } + return convert(Utf8.toString(tmp)); + } + + @Override + public Object decode(ByteBuffer b, FastHit hit) { + Object field = decode(b); + hit.setField(name, field); + return field; + } + + @Override + public String toString() { + return "field " + getName() + " type XMLString"; + } + + @Override + public int getLength(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int len = b.getInt() & 0x7fffffff; + b.position(offset + len + (Integer.SIZE >> 3)); + return len + (Integer.SIZE >> 3); + } + + @Override + public boolean isCompressed(ByteBuffer b) { + int offset = b.position(); + // MSB = compression flag, re decode + int compressed = b.getInt() & 0x80000000; + b.position(offset); + return compressed != 0; + } + + @Override + public int sizeOfLength() { + return Integer.SIZE >> 3; + } + + public Object convert(Inspector value) { + return convert(value.asString("")); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java new file mode 100644 index 00000000000..b34b74ccae3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.fastsearch; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore b/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/grouping/legacy/.gitignore diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java new file mode 100644 index 00000000000..8361cb722e9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/AnnotateStringFieldPart.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** TODO: Class header! */ +public class AnnotateStringFieldPart implements FieldPart { + + public static final char RAW_ANNOTATE_BEGIN_CHAR = '\uFFF9'; + public static final char RAW_ANNOTATE_SEPARATOR_CHAR = '\uFFFA'; + public static final char RAW_ANNOTATE_END_CHAR = '\uFFFB'; + + private String content; + private String rawContent; + + public AnnotateStringFieldPart(String source, int index) { + content = ""; + rawContent = ""; + if (source.charAt(index) == RAW_ANNOTATE_BEGIN_CHAR) { + int sep = source.indexOf(RAW_ANNOTATE_SEPARATOR_CHAR, index); + int end = source.indexOf(RAW_ANNOTATE_END_CHAR, index); + + if (sep != -1) { + rawContent = source.substring(index + 1, sep); + if (end != -1 && end > sep) { + content = source.substring(sep + 1, end); + } + else { + content = rawContent; + } + } + } + } + + public boolean isFinal() { return false; } + + public boolean isToken() { return true; } + + public String getContent() { return rawContent; } + + public void setContent(String content) { + this.content = content; + } + + public String toString() { return content; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java new file mode 100644 index 00000000000..1b306c26f3e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldCloseFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup, representing + * end of a bolded area. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class BoldCloseFieldPart extends MarkupFieldPart { + public BoldCloseFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java new file mode 100644 index 00000000000..b4e8d1cfbf3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/BoldOpenFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup representing + * the start of a bolded area. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class BoldOpenFieldPart extends MarkupFieldPart { + public BoldOpenFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java new file mode 100644 index 00000000000..b1d3abb73a7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldIterator.java @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.List; +import java.util.ListIterator; + +/** + * A specialized list iterator to manipulate FieldParts in HitField objects. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class FieldIterator implements ListIterator<FieldPart> { + + private final ListIterator<FieldPart> realIterator; + private final HitField hitField; + + public FieldIterator(List<FieldPart> fieldList, HitField hitField) { + this.hitField = hitField; + realIterator = fieldList.listIterator(); + } + + public void add(FieldPart o) { + realIterator.add(o); + hitField.markDirty(); + } + + public boolean hasNext() { + return realIterator.hasNext(); + } + + public boolean hasPrevious() { + return realIterator.hasPrevious(); + } + + public FieldPart next() { + return realIterator.next(); + } + + public int nextIndex() { + return realIterator.nextIndex(); + } + + public FieldPart previous() { + return realIterator.previous(); + } + + public int previousIndex() { + return realIterator.previousIndex(); + } + + public void remove() { + realIterator.remove(); + hitField.markDirty(); + } + + public void set(FieldPart o) { + realIterator.set(o); + hitField.markDirty(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java new file mode 100644 index 00000000000..17c39de01f3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/FieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface FieldPart { + public abstract boolean isFinal(); + public abstract boolean isToken(); + public abstract String getContent(); + public abstract String toString(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java new file mode 100644 index 00000000000..638376c791d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/HitField.java @@ -0,0 +1,417 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +import com.yahoo.prelude.searcher.JuniperSearcher; +import com.yahoo.text.XML; + +/** + * Represents a Field in a Hit. The original raw content and the field + * name cannot be modified. But the tokenized version can be retrieved + * and set. + * + * @author <a href="mailto:larschr@yahoo-inc.com">Lars Christian Jensen</a> + */ +public class HitField { + + private final String name; + private final String rawContent; + private final boolean isCJK; + + private boolean xmlProperty; + + private List<FieldPart> tokenizedContent = null; + private String content = null; + + + private Object original; + + /** + * @param f The field name + * @param c The field content + */ + public HitField(String f, String c) { + this(f, c, c.indexOf(JuniperSearcher.RAW_HIGHLIGHT_CHAR) > -1); + } + + /** + * @param f The field name + * @param c The field content + */ + public HitField(String f, XMLString c) { + this(f, c, c.toString().indexOf(JuniperSearcher.RAW_HIGHLIGHT_CHAR) > -1); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + */ + public HitField(String f, String c, boolean cjk) { + this(f, c, cjk, false); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + */ + public HitField(String f, XMLString c, boolean cjk) { + this(f, c.toString(), cjk, true); + } + + /** + * @param f The field name + * @param c The field content + * @param cjk true if this is a cjk-document + * @param xmlProperty true if this should not quote XML syntax + */ + public HitField(String f, String c, boolean cjk, boolean xmlProperty) { + name = f; + rawContent = c; + content = null; + isCJK = cjk; + this.xmlProperty = xmlProperty; + } + + + /** + * @return the name of this field + */ + public String getName() { + return name; + } + + /** + * @return the raw/original content of this field + */ + public String getRawContent() { + return rawContent; + } + + private List<FieldPart> tokenizeUnknown() { + List<FieldPart> pre = new ArrayList<>(); + if (rawContent.length() == 0) + return pre; + int i = 0; + int j = 0; + i = rawContent.indexOf('\u001E'); + if (i == 0) { + pre.add(new SeparatorFieldPart(rawContent.substring(0,1))); + j = 1; + i = rawContent.indexOf('\u001E', j); + } + while(i != -1) { + tokenizeSnippet(pre, rawContent.substring(j, i)); + pre.add(new SeparatorFieldPart(rawContent.substring(i,i+1))); + i++; + j = i; + i = rawContent.indexOf('\u001E', j); + } + if (j < rawContent.length()) { + tokenizeSnippet(pre, rawContent.substring(j)); + } + return pre; + } + + private boolean isAnnotationChar(char c) { + return c == AnnotateStringFieldPart.RAW_ANNOTATE_BEGIN_CHAR || + c == AnnotateStringFieldPart.RAW_ANNOTATE_SEPARATOR_CHAR || + c == AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR; + } + + private void tokenizeSnippet(List<FieldPart> resultParts, String content) { + int head = 0; + int tail = 0; + boolean justFinishedIncompleteAnnotation = false; + int numRawHighLightChars = 0; + List<FieldPart> localParts = new ArrayList<>(); + if (content.length() == 0) { + return; + } + + boolean prevHeadLetterOrDigital = Character.isLetterOrDigit(content.charAt(0)); + + for ( ;head < content.length(); head++) { + char headChar = content.charAt(head); + if (isAnnotationChar(headChar)) { + if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_BEGIN_CHAR) { + int nextHead = content.indexOf(AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR, head); + boolean incompleteAnnotation = (nextHead == -1); + boolean skippedInvalidHighlightChar = false; + if (head > tail) { + int currHead = head; + if (incompleteAnnotation && + content.charAt(head-1) == JuniperSearcher.RAW_HIGHLIGHT_CHAR && + numRawHighLightChars % 2 == 1) + { + currHead--; // skip invalid highlight char + skippedInvalidHighlightChar = true; + } + localParts.add(createToken(content.substring(tail, currHead), prevHeadLetterOrDigital)); + } + if (!skippedInvalidHighlightChar) { + localParts.add(new AnnotateStringFieldPart(content, head)); + } + head = nextHead; + } else if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_SEPARATOR_CHAR) { + localParts.clear(); + head = content.indexOf(AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR, head); + justFinishedIncompleteAnnotation = true; + } else if (headChar == AnnotateStringFieldPart.RAW_ANNOTATE_END_CHAR) { + localParts.clear(); + justFinishedIncompleteAnnotation = true; + } + if (head == -1) { + head = content.length(); + } else { + if (head + 1 < content.length()) { + prevHeadLetterOrDigital = Character.isLetterOrDigit(content.charAt(head + 1)); + } + } + tail = head + 1; + } else { + if (headChar == JuniperSearcher.RAW_HIGHLIGHT_CHAR) { + if (justFinishedIncompleteAnnotation) { + tail = head + 1; // skip invalid highlight char + } else { + ++numRawHighLightChars; + } + } + boolean currHeadLetterOrDigital = Character.isLetterOrDigit(headChar); + if (currHeadLetterOrDigital != prevHeadLetterOrDigital & head > tail) { + localParts.add(createToken(content.substring(tail, head), prevHeadLetterOrDigital)); + tail = head; + prevHeadLetterOrDigital = currHeadLetterOrDigital; + } + justFinishedIncompleteAnnotation = false; + } + } + if (head > tail) { + localParts.add(createToken(content.substring(tail), prevHeadLetterOrDigital)); + } + resultParts.addAll(localParts); + } + + private FieldPart createToken(String substring, boolean isToken) { + if (xmlProperty) { + // TODO: Model this with something better than ImmutableFieldPart + return new ImmutableFieldPart(substring, isToken); + } else { + return new StringFieldPart(substring, isToken); + } + } + + private List<FieldPart> tokenizePretokenized() { + String[] pre = rawContent.split("\u001F+"); + List<FieldPart> tokenized = new ArrayList<>(pre.length); + for (int i = 0; i < pre.length; i++) { + tokenized.add(createToken(pre[i], true)); + } + return tokenized; + } + + private void tokenizeContent() { + List<FieldPart> pre; + if (isCJK) { + pre = tokenizePretokenized(); + } else { + pre = tokenizeUnknown(); + } + setTokenizedContentUnchecked(pre); + } + /** + * Get a list representation of the tokens in the content. This is + * only a copy, changes here will not affect the HitField. + * + * @return a list containing the content in tokenized form. + */ + public List<FieldPart> getTokenizedContent() { + List<FieldPart> l = new ArrayList<>(); + for (ListIterator<FieldPart> i = tokenIterator(); i.hasNext(); ) { + l.add(i.next()); + } + return l; + } + + private List<FieldPart> ensureTokenized() { + if (tokenizedContent == null) { + tokenizeContent(); + } + return tokenizedContent; + } + /** + * Return an iterator for the tokens, delimiters and markup elements + * of the field. + */ + public ListIterator<FieldPart> listIterator() { + return new FieldIterator(ensureTokenized(), + this); + } + + /** + * Return an iterator for the tokens in the field + */ + public ListIterator<FieldPart> tokenIterator() { + return new TokenFieldIterator(ensureTokenized(), + this); + } + + /** + * Only FieldPart objects must be present in the list. + * + * @param list contains the new content of this HitField in tokenized form. + */ + public void setTokenizedContent(List<FieldPart> list) { + tokenizedContent = new ArrayList<>(list.size()); + for (Iterator<FieldPart> i = list.iterator(); i.hasNext(); ) { + tokenizedContent.add(i.next()); + } + // Must null content reference _before_ calling getContent() + content = null; + } + + private void setTokenizedContentUnchecked(List<FieldPart> list) { + tokenizedContent = list; + // Must null content reference _before_ calling getContent() + content = null; + } + /** + * @return the content of this field + */ + public String getContent() { + if (content == null) { + StringBuilder buf = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + buf.append(iter.next().getContent()); + } + content = buf.toString(); + } + return content; + } + + /** + * @return the content of this field, using the arguments as bolding + * tags + */ + public String getContent(String boldOpenTag, + String boldCloseTag, + String separatorTag) { + StringBuilder buf = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof BoldOpenFieldPart + && boldOpenTag != null + && boldOpenTag.length() > 0) + buf.append(boldOpenTag); + else if (f instanceof BoldCloseFieldPart + && boldCloseTag != null + && boldCloseTag.length() > 0) + buf.append(boldCloseTag); + else if (f instanceof SeparatorFieldPart + && separatorTag != null + && separatorTag.length() > 0) + buf.append(separatorTag); + else + buf.append(f.getContent()); + } + return buf.toString(); + } + + public void markDirty() { + content = null; + } + + /** + * @param inAttribute whether to quote quotation marks + * @return the content of this field as an XML string + */ + public String quotedContent(boolean inAttribute) { + StringBuilder xml = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f.isFinal()) + xml.append(f.getContent()); + else + xml.append(XML.xmlEscape(f.getContent(), inAttribute)); + } + return xml.toString(); + } + + /** + * @return the content of this field, using the arguments as bolding + * tags, as an XML string + */ + public String quotedContent(String boldOpenTag, + String boldCloseTag, + String separatorTag, + boolean inAttribute) { + StringBuilder xml = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof BoldOpenFieldPart + && boldOpenTag != null + && boldOpenTag.length() > 0) + xml.append(boldOpenTag); + else if (f instanceof BoldCloseFieldPart + && boldCloseTag != null + && boldCloseTag.length() > 0) + xml.append(boldCloseTag); + else if (f instanceof SeparatorFieldPart + && separatorTag != null + && separatorTag.length() > 0) + xml.append(separatorTag); + else if (f.isFinal()) + xml.append(f.getContent()); + else + xml.append(XML.xmlEscape(f.getContent(), inAttribute)); + } + return xml.toString(); + } + /** + * @return the content of the field, stripped of markup + */ + public String bareContent(boolean XMLQuote, boolean inAttribute) { + StringBuilder bareContent = new StringBuilder(); + Iterator<FieldPart> iter = ensureTokenized().iterator(); + while(iter.hasNext()) { + FieldPart f = iter.next(); + if (f instanceof MarkupFieldPart) + continue; + + if (XMLQuote) + bareContent.append(XML.xmlEscape(f.getContent(), inAttribute)); + else + bareContent.append(f.getContent()); + } + return bareContent.toString(); + } + + public String toString() { + return getContent(); + } + + /** + * Fetch the object which (the String representation of) this HitField was + * built from. This may be null as setting the original is optional. + */ + public Object getOriginal() { + return original; + } + + /** + * Optionally set the object which this HitField should represent. + */ + public void setOriginal(Object original) { + this.original = original; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java new file mode 100644 index 00000000000..d7bfe0e287d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/ImmutableFieldPart.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is a possibly + * mutable string element + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class ImmutableFieldPart implements FieldPart { + private final String content; + private final String initContent; + // Whether this element represents a (part of) a token or a + // delimiter string. When splitting existing parts, the new + // parts should inherit this state from the object they were + // split from. + private boolean tokenOrDelimiter; + public ImmutableFieldPart(String initContent, + boolean tokenOrDelimiter) { + this(initContent, initContent, tokenOrDelimiter); + } + public ImmutableFieldPart(String initContent, + String content, + boolean tokenOrDelimiter) { + + this.initContent = initContent; + this.content = content; + this.tokenOrDelimiter = tokenOrDelimiter; + } + public boolean isFinal() { return true; } + public boolean isToken() { return tokenOrDelimiter; } + public String getContent() { return content; } + public String getInitContent() { return initContent; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java new file mode 100644 index 00000000000..f8992c7004c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/JSONString.java @@ -0,0 +1,449 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import com.yahoo.prelude.query.WeightedSetItem; +import com.yahoo.text.Utf8; +import com.yahoo.text.XML; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.slime.SlimeAdapter; +import com.yahoo.slime.Slime; +import com.yahoo.slime.JsonDecoder; +import java.util.Iterator; + +/** + * A JSON wrapper. Contains XML-style rendering of a JSON structure. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class JSONString implements Inspectable { + + private static final long serialVersionUID = -3929383619752472712L; + private Inspector value; + private String content; + private boolean didInitContent = false; + private Object parsedJSON; + private boolean didInitJSON = false; + + public JSONString(Inspector value) { + if (value == null) { + throw new IllegalArgumentException("JSONString does not accept null value."); + } + this.value = value; + } + + public Inspector inspect() { + if (value == null) { + JsonDecoder decoder = new JsonDecoder(); + Slime slime = decoder.decode(new Slime(), Utf8.toBytes(content)); + if (slime.get().field("error_message").valid() && + slime.get().field("partial_result").valid() && + slime.get().field("offending_input").valid()) + { + // probably a json parse error... + value = new Value.StringValue(content); + } else if (slime.get().type() == com.yahoo.slime.Type.OBJECT || + slime.get().type() == com.yahoo.slime.Type.ARRAY) + { + // valid json object or array + value = new SlimeAdapter(slime.get()); + } else { + // 'valid' json, but leaf value + value = new Value.StringValue(content); + } + } + return value; + } + + private void initContent() { + if (didInitContent) { + return; + } + didInitContent = true; + if (value.type() == Type.EMPTY) { + content = ""; + } else if (value.type() == Type.STRING) { + content = value.asString(); + } else { + // This will be json, because we know there is Slime below + content = value.toString(); + } + } + + /** + * @throws IllegalArgumentException Does not accept null content + */ + public JSONString(String content) { + if (content == null) { + throw new IllegalArgumentException("JSONString does not accept null content."); + } + this.content = content; + didInitContent = true; + } + + public String toString() { + if (value != null) { + return renderFromInspector(); + } + initContent(); + if (content.length() == 0) { + return content; + } + initJSON(); + if (parsedJSON == null) { + return content; + } else if (parsedJSON.getClass() == JSONArray.class) { + return render((JSONArray) parsedJSON); + } else if (parsedJSON.getClass() == JSONObject.class) { + return render((JSONObject) parsedJSON); + } else { + return content; + } + } + + public boolean fillWeightedSetItem(WeightedSetItem item) { + initContent(); + initJSON(); + try { + if (parsedJSON instanceof JSONArray) { + JSONArray seq = (JSONArray)parsedJSON; + for (int i = 0; i < seq.length(); i++) { + JSONArray wsi = seq.getJSONArray(i); + String name = (String)wsi.get(0); + Number weight = (Number) wsi.get(1); + item.addToken(name, weight.intValue()); + } + return true; + } + } catch (JSONException | ClassCastException e) { + } + return false; + } + + private void initJSON() { + initContent(); + if (didInitJSON) { + return; + } + didInitJSON = true; + if (content.charAt(0) == '[') { + try { + parsedJSON = new JSONArray(content); + } catch (JSONException e) { + // System.err.println("bad json: "+e); + return; + } + } else { + try { + parsedJSON = new JSONObject(content); + } catch (JSONException e) { + // System.err.println("bad json: "+e); + return; + } + } + } + + private static String render(JSONArray sequence) { + return FieldRenderer.renderMapOrArray(new StringBuilder(), sequence, 2).toString(); + } + + private static String render(JSONObject structure) { + return FieldRenderer.renderStruct(new StringBuilder(), structure, 2).toString(); + } + + private static abstract class FieldRenderer { + + protected static void indent(StringBuilder renderTarget, int nestingLevel) { + for (int i = 0; i < nestingLevel; ++i) { + renderTarget.append(" "); + } + } + + public static StringBuilder renderMapOrArray(StringBuilder renderTarget, + JSONArray sequence, + int nestingLevel) + { + if (sequence.length() == 0) return renderTarget; + + if (MapFieldRenderer.isMap(sequence)) { + MapFieldRenderer.renderMap(renderTarget, sequence, nestingLevel + 1); + } else { + ArrayFieldRenderer.renderArray(renderTarget, sequence, nestingLevel + 1); + } + indent(renderTarget, nestingLevel); + return renderTarget; + } + + public static StringBuilder renderStruct(StringBuilder renderTarget, JSONObject object, int nestingLevel) { + StructureFieldRenderer.renderStructure(renderTarget, object, nestingLevel + 1); + indent(renderTarget, nestingLevel); + return renderTarget; + } + + public abstract void render(StringBuilder renderTarget, Object value, int nestingLevel); + + public abstract void closeTag(StringBuilder renderTarget, int nestingLevel, String closing); + + /** Returns a value from an object, or null if not found */ + protected static Object get(String field,JSONObject source) { + try { + return source.get(field); + } + catch (JSONException e) { // not found + return null; + } + } + + protected static void renderValue(Object value,StringBuilder renderTarget,int nestingLevel) { + if (value.getClass() == JSONArray.class) { + renderMapOrArray(renderTarget, (JSONArray) value, nestingLevel); + } else if (value instanceof Number) { + NumberFieldRenderer.renderNumber(renderTarget, (Number) value); + } else if (value.getClass() == String.class) { + StringFieldRenderer.renderString(renderTarget, (String) value); + } else if (value.getClass() == JSONObject.class) { + renderStruct(renderTarget, (JSONObject) value, nestingLevel); + } else { + renderTarget.append(value.toString()); + } + } + + } + + private static class MapFieldRenderer extends FieldRenderer { + + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderMap(renderTarget, (JSONArray) value, nestingLevel); + } + + /** Returns true if the given JSON object contains a map - a list of pairs called "key" and "value" */ + private static boolean isMap(JSONArray array) { + Object firstObject=get(0,array); + if ( ! (firstObject instanceof JSONObject)) return false; + JSONObject first=(JSONObject)firstObject; + if (first.length()!=2) return false; + if ( ! first.has("key")) return false; + if ( ! first.has("value")) return false; + return true; + } + + public static void renderMap(StringBuilder renderTarget, JSONArray sequence, int nestingLevel) { + int limit = sequence.length(); + if (limit == 0) return; + for (int i = 0; i < limit; ++i) + renderMapItem(renderTarget, (JSONObject)get(i,sequence), nestingLevel); + renderTarget.append("\n"); + } + + public static void renderMapItem(StringBuilder renderTarget, JSONObject object, int nestingLevel) { + renderTarget.append('\n'); + indent(renderTarget, nestingLevel); + renderTarget.append("<item><key>"); + renderValue(get("key",object), renderTarget, nestingLevel); + renderTarget.append("</key><value>"); + renderValue(get("value",object), renderTarget, nestingLevel); + renderTarget.append("</value></item>"); + } + + /** Returns a value from an array, or null if it does not exist */ + private static Object get(int index,JSONArray source) { + try { + return source.get(index); + } + catch (JSONException e) { // not found + return null; + } + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + private static class StructureFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderStructure(renderTarget, (JSONObject) value, nestingLevel); + } + + public static void renderStructure(StringBuilder renderTarget, JSONObject structure, int nestingLevel) { + for (Iterator<?> i = structure.keys(); i.hasNext();) { + String key = (String) i.next(); + Object value=get(key,structure); + if (value==null) continue; + renderTarget.append('\n'); + indent(renderTarget, nestingLevel); + renderTarget.append("<struct-field name=\"").append(key).append("\">"); + renderValue(value, renderTarget, nestingLevel); + renderTarget.append("</struct-field>"); + } + renderTarget.append('\n'); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + private static class NumberFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderNumber(renderTarget, (Number) value); + } + + public static void renderNumber(StringBuilder renderTarget, Number number) { + renderTarget.append(number.toString()); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + renderTarget.append(closing); + } + } + + private static class StringFieldRenderer extends FieldRenderer { + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + renderString(renderTarget, (String) value); + } + + public static void renderString(StringBuilder renderTarget, String value) { + renderTarget.append(XML.xmlEscape(value, false)); + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + renderTarget.append(closing); + } + } + + private static class ArrayFieldRenderer extends FieldRenderer { + protected static FieldRenderer structureFieldRenderer = new StructureFieldRenderer(); + protected static FieldRenderer stringFieldRenderer = new StringFieldRenderer(); + protected static FieldRenderer numberFieldRenderer = new NumberFieldRenderer(); + + @Override + public void render(StringBuilder renderTarget, Object value, int nestingLevel) { + // Only for completeness + renderArray(renderTarget, (JSONArray) value, nestingLevel); + } + + public static void renderArray(StringBuilder renderTarget, JSONArray seq, int nestingLevel) { + FieldRenderer renderer; + int limit = seq.length(); + if (limit == 0) return; + Object sniffer; + try { + sniffer = seq.get(0); + } catch (JSONException e) { + return; + } + if (sniffer.getClass() == JSONArray.class) { + renderWeightedSet(renderTarget, seq, nestingLevel); + return; + } else if (sniffer.getClass() == JSONObject.class) { + renderer = structureFieldRenderer; + } else if (sniffer instanceof Number) { + renderer = numberFieldRenderer; + } else if (sniffer.getClass() == String.class) { + renderer = stringFieldRenderer; + } else { + return; + } + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Object value; + try { + value = seq.get(i); + } catch (JSONException e) { + continue; + } + indent(renderTarget, nestingLevel); + renderTarget.append("<item>"); + renderer.render(renderTarget, value, nestingLevel + 1); + renderer.closeTag(renderTarget, nestingLevel, "</item>\n"); + } + } + + protected static void renderWeightedSet(StringBuilder renderTarget, + JSONArray seq, int nestingLevel) { + int limit = seq.length(); + Object sniffer; + FieldRenderer renderer; + + try { + JSONArray first = seq.getJSONArray(0); + sniffer = first.get(0); + } catch (JSONException e) { + return; + } + + if (sniffer.getClass() == JSONObject.class) { + renderer = structureFieldRenderer; + } else if (sniffer instanceof Number) { + renderer = numberFieldRenderer; + } else if (sniffer.getClass() == String.class) { + renderer = stringFieldRenderer; + } else { + return; + } + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + JSONArray value; + Object name; + Number weight; + + try { + value = seq.getJSONArray(i); + name = value.get(0); + weight = (Number) value.get(1); + + } catch (JSONException e) { + continue; + } + indent(renderTarget, nestingLevel); + renderTarget.append("<item weight=\"").append(weight).append("\">"); + renderer.render(renderTarget, name, nestingLevel + 1); + renderer.closeTag(renderTarget, nestingLevel, "</item>\n"); + } + } + + @Override + public void closeTag(StringBuilder renderTarget, int nestingLevel, String closing) { + indent(renderTarget, nestingLevel); + renderTarget.append(closing); + } + } + + public String getContent() { + initContent(); + return content; + } + + public Object getParsedJSON() { + initContent(); + if (parsedJSON == null) { + initJSON(); + } + return parsedJSON; + } + + public void setParsedJSON(Object parsedJSON) { + this.parsedJSON = parsedJSON; + } + + public String renderFromInspector() { + return XmlRenderer.render(new StringBuilder(), value).toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java new file mode 100644 index 00000000000..6fdf7662b9b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/MarkupFieldPart.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup, not content. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class MarkupFieldPart implements FieldPart { + private String content; + public MarkupFieldPart(String content) { + this.content = content; + } + public boolean isFinal() { return true; } + // Markup is never part of tokens as such + public boolean isToken() { return false; } + public void setContent(String content) { + this.content = content; + } + public String getContent() { return content; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java new file mode 100644 index 00000000000..26787e442fc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/RawData.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * A representation of some random data with unknown semantics + * + * @author arnej27959 + */ +public final class RawData +{ + private byte[] content; + + /** + * Constructor, takes ownership + * @param content some bytes, handover + */ + public RawData(byte[] content) { + this.content = content; + } + + /** + * @return internal byte array containing the actual data received + **/ + public byte[] getInternalData() { + return content; + } + + /** + * an ascii string; non-ascii data is escaped with hex notation + * NB: not always uniquely reversible + **/ + public String toString() { + StringBuilder buf = new StringBuilder(); + for (byte b : content) { + int i = b; + i &= 0xFF; + char cv = (char)i; + if ((i > 31 && i < 127) || cv == '\n' || cv == '\t') { + buf.append(cv); + } else if (i < 16) { + buf.append("\\x0"); + buf.append(Integer.toHexString(i)); + } else if (i < 256) { + buf.append("\\x"); + buf.append(Integer.toHexString(i)); + } else { + // XXX maybe we should only do this? creates possibly-invalid XML though. + buf.append("&"); + buf.append(Integer.toString(i)); + buf.append(";"); + } + } + return buf.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java new file mode 100644 index 00000000000..30a82bdf323 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/SeparatorFieldPart.java @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is markup for + * separating dynamic snippets. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SeparatorFieldPart extends MarkupFieldPart { + public SeparatorFieldPart(String content) { + super(content); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java new file mode 100644 index 00000000000..2d04fa3d08d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/StringFieldPart.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * Represents an element of a hit property which is a possibly + * mutable string element + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class StringFieldPart implements FieldPart { + private String content; + private final String initContent; + // Whether this element represents a (part of) a token or a + // delimiter string. When splitting existing parts, the new + // parts should inherit this state from the object they were + // split from. + private boolean tokenOrDelimiter; + public StringFieldPart(String content, boolean tokenOrDelimiter) { + this.content = content; + initContent = content; + this.tokenOrDelimiter = tokenOrDelimiter; + } + public boolean isFinal() { return false; } + public boolean isToken() { return tokenOrDelimiter; } + public String getContent() { return content; } + public void setContent(String content) { + this.content = content; + } + public String getInitContent() { return initContent; } + public String toString() { return content; } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java new file mode 100644 index 00000000000..3c055472337 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/TokenFieldIterator.java @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +/** + * A specialized list iterator to manipulate tokens in HitField objects. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TokenFieldIterator implements ListIterator<FieldPart> { + + private int index = 0; + private int prevReturned = 0; + private final List<FieldPart> fieldList; + private final HitField hitField; + + public TokenFieldIterator(List<FieldPart> fieldList, HitField hitField) { + this.fieldList = fieldList; + this.hitField = hitField; + } + + public void add(FieldPart o) { + fieldList.add(index, o); + index++; + hitField.markDirty(); + } + + public boolean hasNext() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) + return true; + i++; + } + return false; + } + + public boolean hasPrevious() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) + return true; + } + return false; + } + + public FieldPart next() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) { + index = i + 1; + prevReturned = i; + return fieldList.get(i); + } + i++; + } + throw new NoSuchElementException("No more tokens available."); + } + + public int nextIndex() { + int i = index; + while (i < fieldList.size()) { + if (fieldList.get(i).isToken()) + return i; + i++; + } + return fieldList.size(); + } + + public FieldPart previous() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) { + index = i; + prevReturned = i; + return fieldList.get(i); + } + } + throw new NoSuchElementException("Trying to go before first token available."); + } + + public int previousIndex() { + int i = index; + while (i > 0) { + i--; + if (fieldList.get(i).isToken()) + return i; + } + return -1; + } + + public void remove() { + fieldList.remove(prevReturned); + if (prevReturned < index) + index--; + hitField.markDirty(); + } + + public void set(FieldPart o) { + fieldList.set(prevReturned, o); + hitField.markDirty(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java new file mode 100644 index 00000000000..9338c8ca53a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/XMLString.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +/** + * A representation of an XML chunk. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class XMLString { + + private final String content; + + public XMLString(String content) { + this.content = content; + } + + public String toString() { + return content; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java new file mode 100644 index 00000000000..13f94769d1f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/XmlRenderer.java @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.hitfield; + +import com.yahoo.text.Utf8; +import com.yahoo.text.XML; +import com.yahoo.data.access.Inspector; +import com.yahoo.data.access.Inspectable; +import com.yahoo.data.access.Type; +import com.yahoo.data.access.simple.Value; +import com.yahoo.data.access.slime.SlimeAdapter; +import java.nio.charset.StandardCharsets; + +import java.util.Iterator; +import java.util.Map; + +/** + * Utility class for converting accessible data into the historical "prelude" xml format. + **/ +public class XmlRenderer { + + public static StringBuilder render(StringBuilder target, Inspector value) { + new InspectorRenderer(target).renderInspector(value, 2); + return target; + } + + private static class InspectorRenderer { + + private final StringBuilder renderTarget; + + InspectorRenderer(StringBuilder target) { + this.renderTarget = target; + } + + void renderInspector(Inspector value, int nestingLevel) { + if (value.type() == Type.ARRAY) { + renderMapOrArray(value, nestingLevel); + } else if (value.type() == Type.OBJECT) { + renderStruct(value, nestingLevel); + } else if (value.type() == Type.STRING) { + renderTarget.append(XML.xmlEscape(value.asString(), false)); + } else if (value.type() == Type.LONG) { + long l = value.asLong(); + renderTarget.append(String.valueOf(l)); + } else if (value.type() == Type.DOUBLE) { + double d = value.asDouble(); + renderTarget.append(String.valueOf(d)); + } else if (value.type() == Type.BOOL) { + boolean b = value.asBool(); + renderTarget.append(b ? "true" : "false"); + } else if (value.type() == Type.DATA) { + byte[] data = value.asData(); + renderTarget.append("<data length=\"").append(data.length); + renderTarget.append("\" encoding=\"hex\">"); + for (int i = 0; i < data.length; i++) { + for (int sh = 4; sh >= 0; sh -= 4) { + int val = (data[i] >> sh) & 0xF; + char hexdigit = (val < 10) ? ((char)('0' + val)) : ((char)('A' + val - 10)); + renderTarget.append(hexdigit); + } + } + renderTarget.append("</data>"); + } + } + + private void renderMapItem(Inspector object, int nestingLevel) { + renderTarget.append('\n'); + indent(nestingLevel); + renderTarget.append("<item><key>"); + renderInspector(object.field("key"), nestingLevel); + renderTarget.append("</key><value>"); + renderInspector(object.field("value"), nestingLevel); + renderTarget.append("</value></item>"); + } + + private void renderStructure(Inspector structure, int nestingLevel) { + for (Map.Entry<String,Inspector> entry : structure.fields()) { + String key = entry.getKey(); + Inspector value = entry.getValue(); + renderTarget.append('\n'); + indent(nestingLevel); + renderTarget.append("<struct-field name=\"").append(key).append("\">"); + renderInspector(value, nestingLevel); + renderTarget.append("</struct-field>"); + } + renderTarget.append('\n'); + } + + private void renderStruct(Inspector object, int nestingLevel) { + renderStructure(object, nestingLevel + 1); + indent(nestingLevel); + } + + private void indent(int nestingLevel) { + for (int i = 0; i < nestingLevel; ++i) { + renderTarget.append(" "); + } + } + + private void renderMap(Inspector sequence, int nestingLevel) { + int limit = sequence.entryCount(); + if (limit == 0) return; + for (int i = 0; i < limit; ++i) + renderMapItem(sequence.entry(i), nestingLevel); + renderTarget.append("\n"); + } + + /** Returns true if the given array represents a map - a list of pairs called "key" and "value" */ + private boolean isMap(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.OBJECT) return false; + if (firstObject.fieldCount() != 2) return false; + if (! firstObject.field("key").valid()) return false; + if (! firstObject.field("value").valid()) return false; + return true; + } + + /** + * Returns true if the given array represents a weighted set, + * as a list of pairs called "item" and "weight" + **/ + private boolean isWeightedSetObjects(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.OBJECT) return false; + if (firstObject.fieldCount() != 2) return false; + if (! firstObject.field("item").valid()) return false; + if (! firstObject.field("weight").valid()) return false; + return true; + } + + /** + * Returns true if the given array represents a weighted set, + * as a list of tuples + **/ + private boolean isWeightedSetArrays(Inspector array) { + Inspector firstObject = array.entry(0); + if (firstObject.type() != Type.ARRAY) return false; + if (firstObject.entryCount() != 2) return false; + return true; + } + + private void renderMapOrArray(Inspector sequence, int nestingLevel) + { + if (sequence.entryCount() == 0) return; + if (isMap(sequence)) { + renderMap(sequence, nestingLevel + 1); + } else if (isWeightedSetArrays(sequence)) { + renderWeightedSet(sequence, nestingLevel + 1, true); + } else if (isWeightedSetObjects(sequence)) { + renderWeightedSet(sequence, nestingLevel + 1, false); + } else { + renderArray(sequence, nestingLevel + 1); + } + indent(nestingLevel); + } + + private void renderWeightedSet(Inspector seq, int nestingLevel, boolean nestedarray) + { + int limit = seq.entryCount(); + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Inspector value = nestedarray ? seq.entry(i).entry(0) : seq.entry(i).field("item"); + Inspector weight = nestedarray ? seq.entry(i).entry(1) : seq.entry(i).field("weight"); + long lw = 0; + double dw = 0; + if (weight.type() == Type.LONG) { + lw = weight.asLong(); + dw = (double)lw; + } + if (weight.type() == Type.DOUBLE) { + dw = weight.asDouble(); + lw = (long)dw; + } + indent(nestingLevel); + renderTarget.append("<item weight=\""); + if (dw == (double)lw || weight.type() == Type.LONG) { + renderTarget.append(lw); + } else { + renderTarget.append(dw); + } + renderTarget.append("\">"); + renderInspector(value, nestingLevel); + renderTarget.append("</item>\n"); + } + } + + private void renderArray(Inspector seq, int nestingLevel) { + int limit = seq.entryCount(); + if (limit == 0) return; + renderTarget.append('\n'); + for (int i = 0; i < limit; ++i) { + Inspector value = seq.entry(i); + indent(nestingLevel); + renderTarget.append("<item>"); + renderInspector(value, nestingLevel); + renderTarget.append("</item>\n"); + } + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java b/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java new file mode 100644 index 00000000000..9e5a5b08c6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/hitfield/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.prelude.hitfield; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java b/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java new file mode 100644 index 00000000000..b327fed1c4b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/logging/AccessLogEntry.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.logging; + +/** + * Hollow compatibility class for com.yahoo.container.logging.AccessLogEntry. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AccessLogEntry extends com.yahoo.container.logging.AccessLogEntry { + + public AccessLogEntry() { + super(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java b/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java new file mode 100644 index 00000000000..6ba2f1ce648 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/logging/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.logging; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/package-info.java b/container-search/src/main/java/com/yahoo/prelude/package-info.java new file mode 100644 index 00000000000..fdb1a2068a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/package-info.java @@ -0,0 +1,8 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The main classes of the Vespa Query Result Server. As an external API, this is <b>deprecated</b> starting with Vespa 4.2 + */ +@ExportPackage +package com.yahoo.prelude; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java b/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java new file mode 100644 index 00000000000..ad891f821f6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/AndItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An and'ing of a collection of sub-expressions + * + * @author bratseth + */ +public class AndItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.AND; + } + + public String getName() { + return "AND"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java new file mode 100644 index 00000000000..189c8260785 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/AndSegmentItem.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.Iterator; + +import edu.umd.cs.findbugs.annotations.NonNull; + +/** + * An immutable and'ing of a collection of sub-expressions. It does not extend + * AndItem to avoid code using instanceof handling it as an AndItem. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AndSegmentItem extends SegmentItem implements BlockItem { + + public AndSegmentItem(String rawWord, boolean isFromQuery, boolean stemmed) { + super(rawWord, rawWord, isFromQuery, stemmed, null); + } + + public AndSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + super(rawWord, current, isFromQuery, stemmed, null); + } + + public AndSegmentItem(PhraseSegmentItem item) { + super(item.getRawWord(), item.stringValue(), item.isFromQuery(), item.isStemmed(), null); + int weight = item.getWeight(); + if (item.getItemCount() > 0) { + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + word.setWeight(weight); + addItem(word); + } + } + } + + public ItemType getItemType() { + return ItemType.AND; + } + + public String getName() { + return "SAND"; + } + + @NonNull + public String getIndexName() { + if (getItemCount() == 0) { + return ""; + } else { + return ((IndexedItem) getItem(0)).getIndexName(); + } + } + + // TODO: Is it necessary to override equals? + + public void setWeight(int w) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + i.next().setWeight(w); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java new file mode 100644 index 00000000000..6b6fabafac5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/BlockItem.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An interface used for anything which represents a single block + * of query input. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface BlockItem extends HasIndexItem { + + /** + * The untransformed raw text from the user serving as base for + * this item. + */ + String getRawWord(); + + /** Returns the substring which is the origin of this item, or null if none */ + public Substring getOrigin(); + + /** Returns the value of this term as a string */ + public abstract String stringValue(); + + /** + * Is this block of text conceptually from the user query? + */ + boolean isFromQuery(); + + boolean isStemmed(); + + /** + * Does this item represent "usual words"? + */ + boolean isWords(); + + /** + * If the block has to be resegmented, what operator should be chosen if it + * is necessary to change operator? + */ + SegmentingRule getSegmentingRule(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java new file mode 100644 index 00000000000..6dbaa129f66 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeIndexedItem.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class CompositeIndexedItem extends CompositeTaggableItem implements IndexedItem { + + @NonNull + private String index = ""; + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 31 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java new file mode 100644 index 00000000000..99e388d8db3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.QueryTree; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + + +/** + * Superclass of expressions which contains a collection of sub-expressions + * + * @author bratseth + */ +public abstract class CompositeItem extends Item { + + private List<Item> subitems = new java.util.ArrayList<>(4); + + /** Sets the index name of all subitems of this */ + public void setIndexName(String index) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + item.setIndexName(index); + } + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + for (Item item : subitems) + discloser.addChild(item); + } + + public void ensureNotInSubtree(CompositeItem item) { + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + Item possibleCycle = i.next(); + + if (this == possibleCycle) { + throw new QueryException("Tried to create a cycle in a tree."); + } else if (possibleCycle instanceof CompositeItem) { + ensureNotInSubtree((CompositeItem) possibleCycle); + } + } + } + + public void addItem(Item item) { + adding(item); + subitems.add(item); + } + + protected void adding(Item item) { + Validator.ensureNotNull("Composite item", item); + Validator.ensure("Attempted to add a composite to itself", item != this); + if (item instanceof CompositeItem) { + ensureNotInSubtree((CompositeItem) item); + } + item.setParent(this); + } + + /** + * Inserts the item at a position and increases the index of existing items + * starting on this position by one + * + * @throws IndexOutOfBoundsException if the index is out of range + */ + public void addItem(int index, Item item) { + if (index > subitems.size() || index < 0) { + throw new IndexOutOfBoundsException( + "Could not add a subitem at position " + index + " to " + this); + } + adding(item); + subitems.add(index, item); + } + + /** For NOT items, which may wish to insert nulls */ + void insertNullFirstItem() { + subitems.add(0, null); + } + + /** + * Returns a subitem + * + * @param index the (0-base) index of the item to return + * @throws IndexOutOfBoundsException if there is no subitem at index + */ + public Item getItem(int index) { + return subitems.get(index); + } + + /** + * Replaces the item at the given index + * + * @param index the (0-base) index of the item to replace + * @param item the new item + * @return the old item at this position. The parent of the old item is <i>not</i> cleared + * @throws IndexOutOfBoundsException if there is no item at this index + */ + public Item setItem(int index, Item item) { + if (index >= subitems.size() || index < 0) + throw new IndexOutOfBoundsException("Could not add a subitem at position " + index + " to " + this); + + adding(item); + Item old = subitems.set(index, item); + if (old!=item) + removing(old); + return old; + } + + /** + * Returns the index of a subitem + * + * @param item The child item to find the index of + * @return the 0-base index of the child or -1 if there is no such child + */ + public int getItemIndex(Item item) { + return subitems.indexOf(item); + } + + /** + * Removes the item at the given index + * + * @param index the index of the item to remove + * @return the removed item + * @throws IndexOutOfBoundsException if there is no item at the given index + */ + public Item removeItem(int index) { + Item item = subitems.remove(index); + + removing(item); + return item; + } + + /** Always call on every remove */ + private void removing(Item item) { + if (item == null) { + return; + } + if (item.getParent() == this) { // Otherwise, this belongs to somebody else now (somebody are doing addField, removeField) + item.setParent(null); + } + } + + /** + * Removes the given item. Does nothing if the item is not present. + * + * @param item the item to remove + * @return whether the item was removed + */ + public boolean removeItem(Item item) { + boolean removed = subitems.remove(item); + + if (removed) { + removing(item); + } + return removed; + } + + /** Returns the number of direct ancestors of this item */ + public int getItemCount() { + return subitems.size(); + } + + /** Returns a modifiable list iterator */ + public ListIterator<Item> getItemIterator() { + return new ListIteratorWrapper(this); + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + + itemCount += subitem.encode(buffer); + } + return itemCount; + } + + /** + * Encodes just this item, not it's usual subitems, to the given buffer. + */ + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(encodingArity(), buffer); + } + + protected int encodingArity() { + return subitems.size(); + } + + protected void appendBodyString(StringBuilder buffer) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + buffer.append(item.toString()); + if (i.hasNext()) { + buffer.append(" "); + } + } + } + + /** Composite items should be parenthized when not on the top level */ + protected boolean shouldParenthize() { + return getParent()!= null && ! (getParent() instanceof QueryTree); + } + + /** Returns a deep copy of this item */ + public CompositeItem clone() { + CompositeItem copy = (CompositeItem) super.clone(); + + copy.subitems = new java.util.ArrayList<>(); + for (Item subItem : subitems) { + Item subItemCopy = subItem.clone(); + copy.adding(subItemCopy); + copy.subitems.add(subItemCopy); + } + fixConnexity(copy); + return copy; + } + + private void fixConnexity(CompositeItem copy) { + List<Item> flatland = new ArrayList<>(); + List<Item> flatCopy = new ArrayList<>(); + taggingFlatten(this, flatland); + taggingFlatten(copy, flatCopy); + int barrier = flatland.size(); + for (int i = 0; i < barrier; ++i) { + Item orig = flatland.get(i); + int connectedTo = find(orig.connectedItem, flatland); + if (connectedTo >= 0) { + TaggableItem tagged = (TaggableItem) flatCopy.get(i); + tagged.setConnectivity(flatCopy.get(connectedTo), orig.connectivity); + } + } + } + + private void taggingFlatten(Item tree, List<Item> container) { + if (tree.hasUniqueID()) { + container.add(tree); + } else if (tree instanceof CompositeItem) { + CompositeItem asComposite = (CompositeItem) tree; + for (Iterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + taggingFlatten(i.next(), container); + } + } + } + + private int find(Item needle, List<Item> haystack) { + if (needle == null) { + return -1; + } + int barrier = haystack.size(); + for (int i = 0; i < barrier; ++i) { + if (haystack.get(i) == needle) { + return i; + } + } + return -1; + } + + public int hashCode() { + int code = getName().hashCode() + subitems.size() * 17; + + for (int i = 0; i < subitems.size() && i <= 5; i++) { + code += subitems.get(i).hashCode(); + } + return code; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + + CompositeItem other = (CompositeItem) object; // Ensured by superclass + + if (!this.subitems.equals(other.subitems)) { + return false; + } + + return true; + } + + /** Make composite immutable if this is supported. */ + public void lock() {} + + /** Whether this composite is in a mutable state. */ + public boolean isLocked() { + return false; + } + + /** Handles mutator calls correctly */ + private static class ListIteratorWrapper implements ListIterator<Item> { + + private CompositeItem owner; + + private ListIterator<Item> wrapped; + + private Item current = null; + + public ListIteratorWrapper(CompositeItem owner) { + this.owner = owner; + wrapped = owner.subitems.listIterator(); + } + + public boolean hasNext() { + return wrapped.hasNext(); + } + + public Item next() { + current = wrapped.next(); + return current; + } + + public boolean hasPrevious() { + return wrapped.hasPrevious(); + } + + public Item previous() { + Item current = wrapped.previous(); + + return current; + } + + public int nextIndex() { + return wrapped.nextIndex(); + } + + public int previousIndex() { + return wrapped.previousIndex(); + } + + public void remove() { + owner.removing(current); + wrapped.remove(); + } + + public void set(Item o) { + Item newItem = o; + + owner.removing(current); + owner.adding(newItem); + current = newItem; + wrapped.set(newItem); + } + + public void add(Item o) { + Item newItem = o; + + owner.adding(newItem); + // TODO: Change current here? Check javadoc + wrapped.add(o); + } + + } + + @Override + public int getTermCount() { + int terms = 0; + for (Item item : subitems) { + terms += item.getTermCount(); + } + return terms; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java new file mode 100644 index 00000000000..186f9686150 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class CompositeTaggableItem extends CompositeItem implements TaggableItem { + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java b/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java new file mode 100644 index 00000000000..fd494dba491 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/DotProductItem.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * A weighted set query item to be evaluated as a sparse dot product. + * + * The resulting dot product will be available as a raw score in the rank framework. + * + * @author havardpe + */ +public class DotProductItem extends WeightedSetItem { + + public DotProductItem(String indexName) { super(indexName); } + + @Override + public ItemType getItemType() { return ItemType.DOTPRODUCT; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java b/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java new file mode 100644 index 00000000000..a847ba1b288 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/EquivItem.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.protect.Validator; + +import java.util.Collection; + +/** + * An Item where each child is an <i>alternative</i> which can be matched. + * Produces the same recall as Or, but differs in that the relevance of a match + * does not increase if more than one children is matched: With Equiv, matching one child perfectly is a perfect match. + * <p> + * This can only have Word, Int or Phrase children. + * + * @author <a href="mailto:havardpe@yahoo-inc.com">havardpe</a> + */ +public class EquivItem extends CompositeTaggableItem { + + public ItemType getItemType() { + return ItemType.EQUIV; + } + + public String getName() { + return "EQUIV"; + } + + @Override + protected void adding(Item item) { + super.adding(item); + Validator.ensure("Equiv can only have word/int/phrase as children", + item.getItemType() == ItemType.WORD || + item.getItemType() == ItemType.INT || + item.getItemType() == ItemType.PHRASE); + } + + /** make an EQUIV item with no children */ + public EquivItem() {} + + /** + * create an EQUIV with the given item as child. + * The new EQUIV will take connectivity, + * significance and weight from the given item. + * + * @param item Will be modified and added as a child. + **/ + public EquivItem(Item item) { + addItem(item); + + // steal other item's connectivity: + if (item.connectedItem != null) { + setConnectivity(item.connectedItem, item.connectivity); + item.connectedItem = null; + item.connectivity = 0.0; + } + TaggableItem back = (TaggableItem)item.connectedBacklink; + if (back != null) { + back.setConnectivity(this, back.getConnectivity()); + item.connectedBacklink = null; + } + + // steal other item's significance: + if (item.explicitSignificance) { + setSignificance(item.significance); + } + + // steal other item's weight: + setWeight(item.getWeight()); + + // we have now stolen all of the other item's unique id needs: + item.setHasUniqueID(false); + } + + /** + * create an EQUIV with the given item and a set + * of alternate words as children. + * The new EQUIV will take connectivity, + * significance and weight from the given item. + * + * @param item Will be modified and added as a child. + * @param words Set of words to create WordItems from. + **/ + public EquivItem(Item item, Collection<String> words) { + this(item); + String idx = ((IndexedItem)item).getIndexName(); + for (String word : words) { + WordItem witem = new WordItem(word, idx); + addItem(witem); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java new file mode 100644 index 00000000000..3972d2b808e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ExactstringItem.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * @author balder + */ +// TODO: balder to fix javadoc +public class ExactstringItem extends WordItem { + + public ExactstringItem(String substring) { + this(substring, false); + } + + public ExactstringItem(String substring, boolean isFromQuery) { + super(substring, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.EXACT; + } + + public String getName() { + return "EXACTSTRING"; + } + + public String stringValue() { + return getWord(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java new file mode 100644 index 00000000000..993c395b191 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/FalseItem.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * A query item which never matches. This is sometimes an useful output of query rewriting. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class FalseItem extends Item { + + @Override + public void setIndexName(String index) { } + + @Override + public ItemType getItemType() { + return ItemType.WORD; // Implemented as a non-matching word as the backend does not support FalseItem + } + + @Override + public String getName() { return "FALSE"; } + + /** Override to only return "FALSE" rather than "FALSE " */ + @Override + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + } + + @Override + public int encode(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(" ", buffer); // searching for space will not match + return 1; + } + + @Override + public int getTermCount() { return 1; } + + @Override + protected void appendBodyString(StringBuilder buffer) { } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java b/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java new file mode 100644 index 00000000000..2608e6ec58e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/HasIndexItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import edu.umd.cs.findbugs.annotations.NonNull; + + +/** + * An interface for items where it is useful to access an associated + * index name. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface HasIndexItem { + + @NonNull + public String getIndexName(); + + public int getNumWords(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java b/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java new file mode 100644 index 00000000000..509b6f04a66 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Highlight.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.*; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Class encapsulating information on extra highlight-terms for a query + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + */ +public class Highlight implements Cloneable { + + /** + * The name of the property map which contains extra highlight terms + */ + public static final String HIGHLIGHTTERMS = "highlightterms"; + + private Map<String, AndItem> highlightItems = new LinkedHashMap<>(); + + private Map<String, List<String>> highlightTerms = new LinkedHashMap<>(); + + public Highlight() {} + + private void addHighlightItem(String key, Item value) { + /*List<IndexedItem> l = highlightItems.get(key); + if (l == null) { + l = new ArrayList<IndexedItem>(); + highlightItems.put(key, l); + } + l.addField(value);*/ + AndItem item = highlightItems.get(key); + if (item == null) { + item = new AndItem(); + highlightItems.put(key, item); + } + item.addItem(value); + } + + /** + * Add custom highlight term + * + * @param field Field name + * @param item Term to be highlighted + */ + public void addHighlightTerm(String field, String item) { + addHighlightItem(field, new WordItem(toLowerCase(item), field, true)); + } + + /** + * Add custom highlight phrase + * @param field Field name + * @param phrase List of terms to be highlighted as a phrase + */ + public void addHighlightPhrase(String field, List<String> phrase) { + PhraseItem pi = new PhraseItem(); + pi.setIndexName(field); + for (String s : phrase) { + pi.addItem(new WordItem(toLowerCase(s), field, true)); + } + addHighlightItem(field, pi); + } + + /** + * Returns the modifiable map of highlight items (never null) + * + * @return Map of highlight items + */ + public Map<String, AndItem> getHighlightItems() { + return highlightItems; + } + + @Override + public Highlight clone() { + try { + Highlight clone = (Highlight) super.clone(); + + clone.highlightItems = new LinkedHashMap<>(); + for (Map.Entry<String,AndItem> entry: highlightItems.entrySet()) { + clone.highlightItems.put(entry.getKey(),(AndItem)entry.getValue().clone()); + } + + clone.highlightTerms = new LinkedHashMap<>(); + for (Map.Entry<String, List<String>> entry : highlightTerms.entrySet()) + clone.highlightTerms.put(entry.getKey(), new ArrayList<>(entry.getValue())); + + return clone; + + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public Map<String, List<String>> getHighlightTerms() { return highlightTerms; } + + /** Prepares this for binary serialization. For internal use - see {@link com.yahoo.search.Query#prepare} */ + public void prepare() { + this.highlightTerms.clear(); + + for (String index : getHighlightItems().keySet()) { + AndItem root = getHighlightItems().get(index); + List<WordItem> words = new ArrayList<>(); + List<CompositeItem> phrases = new ArrayList<>(); + for (Iterator<Item> i = root.getItemIterator(); i.hasNext(); ) { + Item item = i.next(); + if (item instanceof WordItem) { + words.add((WordItem)item); + } else if (item instanceof CompositeItem) { + phrases.add((CompositeItem)item); + } + } + + List<String> terms = new ArrayList<>(); + terms.add(String.valueOf(words.size() + phrases.size())); + for (WordItem item : words) { + terms.add(item.getWord()); + } + + for (CompositeItem item : phrases) { + terms.add("\""); + terms.add(String.valueOf(item.getItemCount())); + for (Iterator<Item> i = item.getItemIterator(); i.hasNext(); ) { + terms.add(((IndexedItem)i.next()).getIndexedString()); + } + terms.add("\""); + } + + if (terms.size() > 1) + this.highlightTerms.put(index, terms); + } + } + + + +} + + + + diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java new file mode 100644 index 00000000000..6f873e28f8a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IndexedItem.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Interface for Items that is indexed + * + * @author Lars Christian Jensen + */ +public interface IndexedItem extends HasIndexItem { + + /** Sets the name of the index to search */ + public void setIndexName(String index); + + /** + * Return the searchable term contents of this item. + * + * @return a string representation of what is presumably stored in an index + * which will match this item + */ + public String getIndexedString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java new file mode 100644 index 00000000000..af6108b819d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IndexedSegmentItem.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class IndexedSegmentItem extends TaggableSegmentItem implements IndexedItem { + + @NonNull + private String index = ""; + + protected IndexedSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + super(rawWord, current, isFromQuery, stemmed, origin); + } + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 31 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java b/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java new file mode 100644 index 00000000000..90a9ce5a07f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/IntItem.java @@ -0,0 +1,292 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import java.math.BigInteger; +import java.nio.ByteBuffer; + + +/** + * This represents either + * <ul> + * <li>a number (integer or floating point) + * <li>a partial range, given by "<number" or ">number", where the numbers are exclusive, or + * <li>a full or open range "[number;number], "[number;]" or "[;number]" where the numbers are inclusive, + * or exclusive if a square bracket is replaced with a pointy one + * </ul> + * + * If a range is specified in brackets, it is also permissible to add a third number specifying the number of hits this + * will match on each node - [from;to;hitLimit] + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class IntItem extends TermItem { + + /** The inclusive lower end of this range */ + private Limit from; + + /** The inclusive upper end of this range */ + private Limit to; + + private int hitLimit = 0; + + /** The number expression of this */ + private String expression; + + /** + * Creates an int item which must be equal to the given int number - + * that is both the lower and upper limit is this number + */ + public IntItem(int number, String indexName) { + this(new Limit(number, true), new Limit(number, true), indexName); + } + + /** + * Creates an int item which must be equal to the given long number - + * that is both the lower and upper limit is this number + */ + public IntItem(long number, String indexName) { + this(new Limit(number, true), new Limit(number, true), indexName); + } + + public IntItem(Limit from, Limit to, String indexName) { + super(indexName, false); + this.from = from; + this.to = to; + expression = toExpression(from, to, 0); + } + + /** Returns the simplest expression matching this */ + private String toExpression(Limit from, Limit to, int hitLimit) { + if (from.equals(to) && hitLimit == 0) return from.number().toString(); + + String expression = from.toRangeStart() + ";" + to.toRangeEnd(); + if (hitLimit == 0) return expression; + + // Insert ;hitLimit at the end inside the brackets + return expression.substring(0, expression.length()-1) + ";" + hitLimit + expression.substring(expression.length()-1); + } + + public IntItem(String expression) { + this(expression, ""); + } + + public IntItem(String expression, boolean isFromQuery) { + this(expression, "", isFromQuery); + } + + public IntItem(String expression, String indexName) { + this(expression, indexName, false); + } + + public IntItem(String expression, String indexName, boolean isFromQuery) { + super(indexName, isFromQuery); + setNumber(expression); + } + + public IntItem(Limit from, Limit to, int hitLimit, String indexName, boolean isFromQuery) { + super(indexName, isFromQuery); + setLimits(from, to); + this.hitLimit = hitLimit; + this.expression = toExpression(from, to, hitLimit); + } + + /** Sets limit and flip them if "from" is greater than "to" */ + private final void setLimits(Limit from, Limit to) { + if (from.number().doubleValue() > to.number().doubleValue()) { + this.from = to; + this.to = from; + } + else { + this.from = from; + this.to = to; + } + } + + /** Sets the number expression of this - a number or range following the syntax specified in the class javadoc */ + public void setNumber(String expression) { + try { + this.expression = expression; + parseAndAssignLimits(expression.trim()); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("'" + expression + "' is not an int item expression: " + + "Expected NUMBER, '<'NUMBER, '>'NUMBER or ('['|'<')NUMBER;NUMBER(;NUMBER)?(']'|'>')", e); + + } + } + + private void parseAndAssignLimits(String e) { + if (e.startsWith("<") && ! e.contains(";")) { + from = Limit.NEGATIVE_INFINITY; + to = new Limit(asNumber(e.substring(1)), false); + } + else if (e.startsWith(">")) { + from = new Limit(asNumber(e.substring(1)), false); + to = Limit.POSITIVE_INFINITY; + } + else if (e.startsWith("[") || e.startsWith("<")) { + if ( ! (e.endsWith("]") || e.endsWith(">"))) throw new IllegalArgumentException("No closing bracket"); + + boolean inclusiveStart = e.startsWith("["); + boolean inclusiveEnd = e.startsWith("["); + + String[] limits = e.substring(1, e.length()-1).split(";"); + if (limits.length < 1 || limits.length > 3) throw new IllegalArgumentException("Unexpected bracket content"); + Limit from = new Limit(getOr(Double.NEGATIVE_INFINITY, 0, limits), inclusiveStart); + Limit to = new Limit(getOr(Double.POSITIVE_INFINITY, 1, limits), inclusiveEnd); + setLimits(from, to); + hitLimit = getOr(0, 2, limits).intValue(); + } + else { + to = from = new Limit(asNumber(e), true); + } + } + + private Number getOr(Number defaultValue, int valueIndex, String[] values) { + if (valueIndex >= values.length) return defaultValue; + if (values[valueIndex] == null) return defaultValue; + if (values[valueIndex].isEmpty()) return defaultValue; + return asNumber(values[valueIndex]); + } + + private Number asNumber(String numberString) { + try { + if (!numberString.contains(".")) return new Long(numberString); + } + catch (NumberFormatException e) { + return new BigInteger(numberString); + } + return new Double(numberString); + } + + /** Sets the number expression of this - a number or range */ + public String getNumber() { return expression; } + + /** Returns the lower limit of this range, which may be negative infinity */ + public final Limit getFromLimit() { + return from; + } + + /** Returns the upper limit of this range, which may be positive infinity */ + public final Limit getToLimit() { + return to; + } + + /** + * Returns the number of hits this will match, or 0 if all should be matched. + * If this number is positive, the hits closest to <code>from</code> are returned, and if + * this number is negative the hits closest to <code>to</code> are returned. + */ + public final int getHitLimit() { + return hitLimit; + } + + /** + * Sets the number of hits this will match, or 0 if all should be + * matched. If this number is positive, the hits closest to + * <code>from</code> are returned, and if this number is negative the hits + * closest to <code>to</code> are returned. + * + * @param hitLimit + * number of hits to match for this operator + */ + public final void setHitLimit(int hitLimit) { + this.hitLimit = hitLimit; + this.expression = toExpression(from, to, hitLimit); + } + + @Override + public String getRawWord() { + return getNumber(); + } + + @Override + public ItemType getItemType() { + return ItemType.INT; + } + + @Override + public String getName() { + return "INT"; + } + + @Override + public String stringValue() { + return expression; + } + + /** Same as {@link #setNumber} */ + @Override + public void setValue(String value) { setNumber(value); } + + /** Int items uses a empty heading instead of "INT " */ + protected void appendHeadingString(StringBuilder buffer) {} + + @Override + public int hashCode() { + return super.hashCode() + 199 * expression.hashCode(); + } + + @Override + public boolean equals(Object object) { + if ( ! super.equals(object)) return false; + + IntItem other = (IntItem) object; // Ensured by superclass + if ( ! getFromLimit().equals(other.getFromLimit())) return false; + if ( ! getToLimit().equals(other.getToLimit())) return false; + if ( getHitLimit() != other.getHitLimit()) return false; + return true; + } + + /** Returns the number for encoding; the number expression as-is. */ + protected String getEncodedInt() { + return getIndexedString(); + } + + @Override + public String getIndexedString() { + return expression; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + putString(getEncodedInt(), buffer); + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public boolean isWords() { + return false; + } + + /** + * Creates an int item from arguments. + * This will return an instance of the RankItem subclass if either <code>hitLimit</code> or both <code>from</code> + * and <code>to</code> is set to a value other than defaults (respectively 0, double negative and positive infinity). + * And different from each other. + * + * @param indexName the index this searches + * @param from the lower limit (inclusive) on hits + * @param to the higher limit (inclusive) on hits + * @param hitLimit the number of hits to match, or 0 to return all + */ + public static IntItem from(String indexName, Limit from, Limit to, int hitLimit) { + if (hitLimit == 0 && (from.equals(Limit.NEGATIVE_INFINITY) || to.equals(Limit.POSITIVE_INFINITY) || from.equals(to))) + return new IntItem(from, to, indexName); + else { + return new RangeItem(from, to, hitLimit, indexName, false); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java new file mode 100644 index 00000000000..d9f0dcb7b1c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -0,0 +1,507 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.collections.CopyOnWriteHashMap; +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.search.query.QueryTree; +import com.yahoo.text.Utf8; + +import java.nio.ByteBuffer; + + +/** + * <p>A term of the query language. As "term" is also the common term (sorry) + * for a literal to be found (or not) in a search index, the term <i>item</i> + * is used for <i>query language</i> terms.</p> + * + * <p>The query is represented as a composite tree of + * Item subclasses. This allow arbitrary complex combinations of ands, + * nots, phrases and so on.</p> + * + * <p>Items are in general mutable and not thread safe.</p> + * + * @author bratseth + * @author havardpe + */ +public abstract class Item implements Cloneable { + + /** + * The definitions in Item.ItemType must match the ones in + * searchlib/src/searchlib/parsequery/parse.h + */ + public static enum ItemType { + OR(0), + AND(1), + NOT(2), + RANK(3), + WORD(4), + INT(5), + PHRASE(6), + PAREN(7), + PREFIX(8), + SUBSTRING(9), + NEAR(11), + ONEAR(12), + SUFFIX(13), + EQUIV(14), + WEIGHTEDSET(15), + WEAK_AND(16), + EXACT(17), + LEGACY_RISE_QUERY_NOT_USED_ANYMORE_BUT_DO_NOT_REUSE_FOR_A_WHILE(18), + PURE_WEIGHTED_STRING(19), + PURE_WEIGHTED_INTEGER(20), + DOTPRODUCT(21), + WAND(22), + PREDICATE_QUERY(23), + REGEXP(24), + WORD_ALTERNATIVES(25); + + public final int code; + + private ItemType(int code) { + this.code = code; + } + + } + + public static final int DEFAULT_WEIGHT = 100; + + /** The relative importancy of this term in the query. Default is 100 */ + private int weight = DEFAULT_WEIGHT; + + /** + * The definitions in Item.ItemCreator must match the ones in + * searchlib/src/searchlib/parsequery/parse.h + */ + public static enum ItemCreator { + ORIG(0), + FILTER(1); + + public final int code; + + private ItemCreator(int code) { + this.code = code; + } + } + + private boolean fromSpecialToken = false; + + private ItemCreator creator = ItemCreator.ORIG; + + /** The parent in the query tree, or null if this is a root */ + private CompositeItem parent = null; + + /** The annotations made on this item */ + private CopyOnWriteHashMap<String, Object> annotations; + + /** Whether or not this item should affect ranking. */ + private boolean isRanked = true; + + /** Whether or not position data should be used when ranking this item */ + private boolean usePositionData = true; + + /** Whether the item should encode a unique ID */ + private boolean hasUniqueID = false; + + /** Optional symbolic name for this item, requires unique id */ + private String label = null; + + /** Unique identifier to address the item for external annotation */ + protected int uniqueID = 0; + + /** Items for explicit connectivity */ + // TODO: Don't use protected members, especially not for something like this + // Move this to an object which can take care of being a weighted bidirectional reference more elegantly and safely. + protected Item connectedItem; + protected Item connectedBacklink; + protected double connectivity; + + /** Explicit term significance */ + protected double significance; + protected boolean explicitSignificance = false; + + /** Whether this item is eligible for change by query rewriters (false) or should be kept as-is (true) */ + private boolean isProtected; + + /** Sets the index name of this item */ + public abstract void setIndexName(String index); + + /** Returns the int code of this item */ + public final int getCode() { + return getItemType().code; + } + + /** Return the enumerated type of this item. */ + public abstract ItemType getItemType(); + + /** Returns the name of this item */ + public abstract String getName(); + + /** + * Sets whether this is a filter term. + * This indicates that the term origins from the filter parameter in the search API. + * The search backend does to handle filter terms any different than non-filter terms. + */ + public void setFilter(boolean filter) { + if (filter) { + creator = ItemCreator.FILTER; + } else { + creator = ItemCreator.ORIG; + } + } + + /** Returns whether this is a filter term */ + public boolean isFilter() { + return creator == ItemCreator.FILTER; + } + + /** Returns the item creator value. */ + public ItemCreator getCreator() { + return creator; + } + + /** Sets the item creator value. */ + public void setCreator(ItemCreator creator) { + this.creator = creator; + } + + /** Sets the relative importance of this term */ + public void setWeight(int w) { + weight = w; + } + + /** Returns the relative importance of this term. Default is 100. */ + public int getWeight() { + return weight; + } + + /** + * Annotate this item + * + * @param key the annotation key + * @param value the value, or null to set a valueless annotation + */ + public void addAnnotation(String key, Object value) { + if (annotations == null) + annotations = new CopyOnWriteHashMap<>(); + annotations.put(key, value); + } + + /** + * Returns an annotation on this item, or null if the annotation is not set + */ + public Object getAnnotation(String annotation) { + if (annotations == null) { + return null; + } + return annotations.get(annotation); + } + + /** + * Returns whether this has an annotation + */ + public boolean hasAnnotation(String annotation) { + if (annotations == null) return false; + return annotations.containsKey(annotation); + } + + /** Set whether this should be protected from change/remove by query rewriters */ + public void setProtected(boolean isProtected) { this.isProtected=isProtected; } + + /** Returns whether this is to be protected from change/remove by query rewriters. default is false */ + public boolean isProtected() { return isProtected; } + + + /** Sets the parent in the tree. Do not use: Only to be called from CompositeItem/QueryTree */ + public void setParent(CompositeItem parent) { + this.parent = parent; + } + + /** Returns the parent in the query tree, or null if this node has no parent */ + public CompositeItem getParent() { + return parent; + } + + public abstract int encode(ByteBuffer buffer); + + protected void encodeThis(ByteBuffer buffer) { + int FEAT_SHIFT = 5; + int CODE_MASK = 0x1f; + int FEAT_MASK = 0xe0; + int FEAT_WEIGHT = 0x01; + int FEAT_UNIQUEID = 0x02; + int FEAT_FLAGS = 0x04; + + int features = 0; + + if (weight != DEFAULT_WEIGHT) { + features |= FEAT_WEIGHT; + } + if (hasUniqueID()) { + features |= FEAT_UNIQUEID; + } + byte flags = getFlagsFeature(); + if (flags != 0) { + features |= FEAT_FLAGS; + } + byte type = (byte)(((getCode() & CODE_MASK) + | ((features << FEAT_SHIFT) & FEAT_MASK)) & 0xff); + + buffer.put(type); + if ((features & FEAT_WEIGHT) != 0) { + IntegerCompressor.putCompressedNumber(weight, buffer); + } + if ((features & FEAT_UNIQUEID) != 0) { + IntegerCompressor.putCompressedPositiveNumber(uniqueID, buffer); + } + if (flags != 0) { + buffer.put(flags); + } + } + + /** + * Returns an integer that contains all feature flags for this item. This must be kept in sync with the flags + * defined in searchlib/parsequery/parse.h. + * + * @return The feature flags. + */ + private byte getFlagsFeature() { + byte FLAGS_NORANK = 0x01; + byte FLAGS_SPECIALTOKEN = 0x02; + byte FLAGS_NOPOSITIONDATA = 0x04; + byte FLAGS_ISFILTER = 0x08; + + byte ret = 0; + if (!isRanked()) { + ret |= FLAGS_NORANK; + } + if (isFromSpecialToken()) { + ret |= FLAGS_SPECIALTOKEN; + } + if (!usePositionData()) { + ret |= FLAGS_NOPOSITIONDATA; + } + if (isFilter()) { + ret |= FLAGS_ISFILTER; + } + return ret; + } + + + /** Utility method for turning a string into utf-8 bytes */ + protected static final byte[] getBytes(String string) { + return Utf8.toBytes(string); + } + public static void putString(String s, ByteBuffer buffer) { + putBytes(Utf8.toBytes(s), buffer); + } + public static void putBytes(byte [] bytes, ByteBuffer buffer) { + IntegerCompressor.putCompressedPositiveNumber(bytes.length, buffer); + buffer.put(bytes); + } + + public abstract int getTermCount(); + + /** + * <p>Returns the canonical query language string of this item.</p> + * + * <p>The canonical language represent an item by the string + * <pre> + * ([itemName] [body]) + * </pre> + * where the body may recursively be other items. + * + * <p> + * TODO: Change the output query language into a canonical form of the input + * query language + */ + public String toString() { + StringBuilder buffer = new StringBuilder(); + + if (shouldParenthize()) { + buffer.append("("); + } + if (isFilter()) { + buffer.append("|"); + } + appendHeadingString(buffer); + appendBodyString(buffer); + if (shouldParenthize()) { + buffer.append(")"); + } + + if (weight != DEFAULT_WEIGHT) { + buffer.append("!"); + buffer.append(weight); + } + + return buffer.toString(); + } + + /** + * Returns whether or not this item should be parethized when printed. + * Default is false - no parentheses + */ + protected boolean shouldParenthize() { + return false; + } + + /** Appends the heading of this string. As default getName() followed by a space. */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append(" "); + } + + /** + * Override to append the item body in the canonical query language of this item. + * An item is usually represented by the string + * <pre> + * ([itemName] [body]) + * </pre> + * The body must be appended appended by this method. + */ + protected abstract void appendBodyString(StringBuilder buffer); + + /** Returns a deep copy of this item */ + public Item clone() { + try { + Item clone = (Item)super.clone(); + if (this.annotations != null) + clone.annotations = this.annotations.clone(); + // note: connectedItem and connectedBacklink references are corrected in CompositeItem.clone() + return clone; + } catch (CloneNotSupportedException e) { + throw new RuntimeException("Someone made Item unclonable"); + } + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (object == null) { + return false; + } + if (object.getClass() != this.getClass()) { + return false; + } // Fails on different c.l.'s + + Item other = (Item) object; + + if (this.creator != other.creator) { + return false; + } + if (this.weight != other.weight) { + return false; + } + // if (this.termIndex!=other.termIndex) return false; + + return true; + } + + public int hashCode() { + return weight * 29 + creator.code; + } + + protected boolean hasUniqueID() { + return hasUniqueID; + } + + protected void setHasUniqueID(boolean hasUniqueID) { + this.hasUniqueID = hasUniqueID; + } + + /** + * Label this item with a symbolic name which can later be used by + * the back-end to identify specific items for ranking purposes. + * + * @param label label for this item + **/ + public void setLabel(String label) { + setHasUniqueID(true); + this.label = label; + } + + /** + * Obtain the label for this item. This method will return null if + * no label has been set. + * + * @return label for this item + **/ + public String getLabel() { + return label; + } + + /** + * Sets whether or not this term item should affect ranking. + * If set to false this term is not exposed to the ranking framework in the search backend. + */ + public void setRanked(boolean isRanked) { + this.isRanked = isRanked; + } + + /** Returns whether or not this item should affect ranking. */ + public boolean isRanked() { + return isRanked; + } + + /** + * Sets whether or not position data should be used when ranking this term item. + * If set to false the search backend uses fast bit vector data structures when matching on this term + * and only a few simple ranking features will be available when ranking this term. + * Note that setting this to false also saves a lot of CPU during matching as bit vector data structures are used. + */ + public void setPositionData(boolean usePositionData) { + this.usePositionData = usePositionData; + } + + /** Returns whether or not position data should be used when ranking this item */ + public boolean usePositionData() { + return usePositionData; + } + + public void disclose(Discloser discloser) { + discloser.addProperty("connectivity", connectivity); + discloser.addProperty("connectedItem", connectedItem); //reference + + discloser.addProperty("creator", creator); + discloser.addProperty("explicitSignificance", explicitSignificance); + discloser.addProperty("isRanked", isRanked); + discloser.addProperty("usePositionData", usePositionData); + discloser.addProperty("significance", significance); + discloser.addProperty("weight", weight); + + if (label != null) { + discloser.addProperty("label", label); + } + if (hasUniqueID) { + discloser.addProperty("uniqueID", uniqueID); + } + } + + public boolean isFromSpecialToken() { + return fromSpecialToken; + } + + public void setFromSpecialToken(boolean fromSpecialToken) { + this.fromSpecialToken = fromSpecialToken; + } + + /** + * DO NOT USE + */ + public boolean hasConnectivityBackLink() { + return connectedBacklink != null; + } + + /** Returns true if this is the root item - that is if the parent is the QueryTree (or null for legacy reasons)*/ + public boolean isRoot() { + if (getParent()==null) return true; + if (getParent() instanceof QueryTree) return true; + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java b/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java new file mode 100644 index 00000000000..a2a140abb29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ItemHelper.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.Iterator; +import java.util.List; + +/** + * Helper function for Item + * @author <a href="mailto:arnebef@yahoo-inc.com">Arne Bergene Fossaa</a> + */ +public class ItemHelper { + + /* + We could have exchanged the following 3 functions with this + But this introspection is a bit too much of a hack, so we'll leave it with this. + + + public static <T extends CompositeItem> T ensureIsItem(Item unknown,Class<T> tClass) { + + if(unknown != null && tClass.isInstance(unknown)) { + return (T) unknown; + } + T item; + + try { + Constructor<T> n = tClass.getConstructor(); + item = n.newInstance(); + } catch (NoSuchMethodException e) { + return null; + } catch (InvocationTargetException e) { + return null; + } catch (IllegalAccessException e) { + return null; + } catch (InstantiationException e) { + return null; + } + if(item != null) { + item.addItem(unknown); + } + return item; + + } + */ + + /** Traverse the query tree and return total number of terms */ + int getNumTerms(Item rootNode) { + int numTerms = 0; + + if (rootNode == null) { + return 0; + } else if (rootNode instanceof CompositeItem) { + CompositeItem composite = (CompositeItem) rootNode; + + for (Iterator<Item> i = composite.getItemIterator(); i.hasNext();) { + numTerms += getNumTerms(i.next()); + } + } else if (rootNode instanceof TermItem) { + return 1; + } else { + return 0; + } + return numTerms; + } + + public void getPositiveTerms(Item item, List<IndexedItem> terms) { + if (item instanceof NotItem) { + getPositiveTerms(((NotItem) item).getPositiveItem(), terms); + } else if (item instanceof PhraseItem) { + PhraseItem pItem = (PhraseItem)item; + terms.add(pItem); + } else if (item instanceof CompositeItem) { + for (Iterator<Item> i = ((CompositeItem) item).getItemIterator(); i.hasNext();) { + getPositiveTerms(i.next(), terms); + } + } else if (item instanceof TermItem) { + terms.add((TermItem)item); + } + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Limit.java b/container-search/src/main/java/com/yahoo/prelude/query/Limit.java new file mode 100644 index 00000000000..b830ba13ed5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Limit.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * An immutable numeric range limit which can be inclusive or exclusive + * + * @author bratseth + */ +public class Limit { + + public static final Limit NEGATIVE_INFINITY = new Limit(Double.NEGATIVE_INFINITY, false); + public static final Limit POSITIVE_INFINITY = new Limit(Double.POSITIVE_INFINITY, false); + + private final Number number; + private final boolean inclusive; + private final boolean infinite; + + public Limit(Number number, boolean inclusive) { + this.number = number; + this.infinite = Double.isInfinite(number.doubleValue()); + this.inclusive = inclusive || infinite; + } + + public Number number() { return number; } + + /** Returns true if this limit includes its number, false if it excludes it */ + public boolean isInclusive() { return inclusive; } + + String toRangeStart() { + return (inclusive ? "[" : "<" ) + (isInfinite() ? "" : number.toString()); + } + + String toRangeEnd() { + return (isInfinite() ? "" : number.toString()) + (inclusive ? "]" : ">" ); + } + + /** Returns the smaller of this and the given limit */ + public Limit min(Limit other) { + return this.isSmallerOrEqualTo(other) ? this : other; + } + + /** Returns the larger of this and the given limit */ + public Limit max(Limit other) { + return this.isLargerOrEqualTo(other) ? this : other; + } + + public boolean isSmallerOrEqualTo(Limit other) { + double thisNumber = this.number().doubleValue(); + double otherNumber = other.number().doubleValue(); + if (thisNumber == otherNumber) { + if ( ! other.isInclusive()) return false; + return true; + } + return thisNumber < otherNumber; + } + + public boolean isLargerOrEqualTo(Limit other) { + double thisNumber = this.number().doubleValue(); + double otherNumber = other.number().doubleValue(); + if (thisNumber == otherNumber) { + if ( ! other.isInclusive()) return false; + return true; + } + return thisNumber > otherNumber; + } + + public boolean isInfinite() { return infinite; } + + @Override + public String toString() { + return number + " (" + (inclusive ? "inclusive" : "exclusive") + ")"; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Limit)) return false; + Limit other = (Limit)o; + if (Boolean.compare(other.inclusive, this.inclusive) != 0) return false; + return this.number.equals(other.number); + } + + @Override + public int hashCode() { + return number.hashCode() + (inclusive ? 1 : 0); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java new file mode 100644 index 00000000000..8fb16e8a3ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.net.UrlTokenizer; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + + +/** + * Special words known by the index used for marking things. + * The reserved word itself is not public, while a symbol representation is. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class MarkerWordItem extends WordItem { + + /** Creates a special word item which marks the start of a host name */ + public static WordItem createStartOfHost() { + return new MarkerWordItem("^", UrlTokenizer.TERM_STARTHOST); + } + + /** Creates a special word item which marks the end of a host name */ + public static WordItem createEndOfHost() { + return new MarkerWordItem("$", UrlTokenizer.TERM_ENDHOST); + } + + private String markerWord; + + private MarkerWordItem(String publicSymbol, String markerWord) { + super(publicSymbol); + this.markerWord = markerWord; + } + + /** Returns the marker word for encoding */ + protected String getEncodedWord() { + return markerWord; + } + + public boolean equals(Object o) { + if (!super.equals(o)) { + return false; + } + if (!(o instanceof MarkerWordItem)) { + return false; + } + + MarkerWordItem other = (MarkerWordItem) o; + + return markerWord.equals(other.markerWord); + } + + public int hashCode() { + return super.hashCode() + 499 * markerWord.hashCode(); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("markerWord", markerWord); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java new file mode 100644 index 00000000000..9fa42f90a29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NearItem.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + + +/** + * <p>A set of terms which must be near each other to match.</p> + * + * @author bratseth + * @author havardpe + */ +public class NearItem extends CompositeItem { + + protected int distance; + + /** The default distance used if none is specified: 2 */ + public static final int defaultDistance=2; + + /** Creates a NEAR item with distance 2 */ + public NearItem() { + setDistance(defaultDistance); + } + + /** + * Creates a <i>near</i> item with a limit to the distance + * between the words. + * + * @param distance the number of word position which may separate + * the words for this near item to match + */ + public NearItem(int distance) { + setDistance(distance); + } + + public void setDistance(int distance) { + if (distance < 0) { + throw new IllegalArgumentException("Can not use negative distance '" + distance + "'."); + } + this.distance = distance; + } + + public int getDistance() { + return distance; + } + + public ItemType getItemType() { + return ItemType.NEAR; + } + + public String getName() { + return "NEAR"; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(distance, buffer); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("limit", distance); + } + + /** Appends the heading of this string - <code>[getName()]([limit]) </code> */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(distance); + buffer.append(")"); + buffer.append(" "); + } + + public int hashCode() { + return super.hashCode() + 23* distance; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) return false; + NearItem other = (NearItem) object; // Ensured by superclass + if (this.distance !=other.distance) return false; + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java new file mode 100644 index 00000000000..abac8200f49 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NonReducibleCompositeItem.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * A composite item which specifies semantics which are not maintained + * if an instance with a single child is replaced by the single child. + * <p> + * Most composites, like AND and OR, are reducible as e.g (AND a) is semantically equal to (a). + * <p> + * This type functions as a marked interfaces for query rewriters. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @since 5.1.22 + */ +public abstract class NonReducibleCompositeItem extends CompositeItem { +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java new file mode 100644 index 00000000000..0432795b716 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NotItem.java @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.protect.Validator; + +import java.util.Iterator; + + +/** + * <p>A composite item where the first item is positive and the following + * items are negative items which should be excluded from the result. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +// TODO: Handle nulls by creating nullItem or checking in encode/toString +public class NotItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.NOT; + } + + public String getName() { + return "NOT"; + } + + /** + * Adds an item. The first item is the positive + * the rest is negative + */ + public void addItem(Item item) { + super.addItem(item); + } + + /** + * Adds a negative item. Like addItem but skips the first position + * (position 0) if it is not already set. + */ + public void addNegativeItem(Item negative) { + if (getItemCount() == 0) { + insertNullFirstItem(); + } + addItem(negative); + } + + /** + * Returns the positive item (the first subitem), + * or null if no positive items has been added + */ + public Item getPositiveItem() { + if (getItemCount() == 0) { + return null; + } + return getItem(0); + } + + /** + * Sets the positive item (the first item) + * + * @return the old positive item, or null if there was no items + */ + public Item setPositiveItem(Item item) { + Validator.ensureNotNull("Positive item of " + this, item); + if (getItemCount() == 0) { + addItem(item); + return null; + } else { + return setItem(0, item); + } + } + + /** + * Convenience method for adding a positive item. + * If a positive item is already present + * the positive item becomes an AndItem with the items added + */ + public void addPositiveItem(Item item) { + if (getPositiveItem() == null) { + setPositiveItem(item); + } else if (getPositiveItem() instanceof AndItem) { + ((AndItem) getPositiveItem()).addItem(item); + } else { + AndItem positives = new AndItem(); + + positives.addItem(getPositiveItem()); + positives.addItem(item); + setPositiveItem(positives); + } + } + + public boolean removeItem(Item item) { + int removedIndex = getItemIndex(item); + boolean removed = super.removeItem(item); + + if (removed && removedIndex == 0) { + insertNullFirstItem(); + } + return removed; + } + + public Item removeItem(int index) { + Item removed = super.removeItem(index); + + if (index == 0) { // Don't make the first negative the positive + insertNullFirstItem(); + } + return removed; + } + + /** Not items uses a empty heading instead of "NOT " */ + protected void appendHeadingString(StringBuilder buffer) {} + + /** + * Overridden to tolerate nulls and to append "+" + * to the first item and "-" to the rest + */ + protected void appendBodyString(StringBuilder buffer) { + boolean isFirstItem = true; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + if (isFirstItem) { + buffer.append("+"); + } else { + buffer.append(" -"); + } + if (item == null) { + buffer.append("(null)"); + } else { + buffer.append(item.toString()); + } + isFirstItem = false; + } + } + + /** Returns the number of actual *positive* terms in this */ + @Override + public int getTermCount() { + Item positive = getPositiveItem(); + return positive == null ? 0 : positive.getTermCount(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java new file mode 100644 index 00000000000..aa3a04d670f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/NullItem.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import java.nio.ByteBuffer; + + +/** + * A place holder for null queries to make searchers easier to write. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class NullItem extends Item { + + public NullItem() {} + + /** Does nothing */ + public void setIndexName(String index) {} + + public int encode(ByteBuffer buffer) { + throw new RuntimeException( + "A NullItem was attempted encoded. " + + "This is probably a misbehaving " + "searcher."); + } + + public ItemType getItemType() { + throw new RuntimeException( + "Packet code access attempted. " + + "A NullItem has no packet code. " + + "This is probably a misbehaving " + "searcher."); + } + + public void appendBodyString(StringBuilder buffer) { + // No body for this Item + return; + } + + public void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + } + + public String getName() { + return "NULL"; + } + + @Override + public int getTermCount() { return 0; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java b/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java new file mode 100644 index 00000000000..c7caa9acc8f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ONearItem.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Ordered NearItem. + * <p> + * Matches as a near operator, but also demands that the operands have the + * same order in the document as in the query. + * + * @author bratseth + */ +public class ONearItem extends NearItem { + + /** Creates a ordered NEAR item with limit 2 */ + public ONearItem() { + setDistance(2); + } + + /** + * Creates a ordered near item which matches if there are at most <code>distance</code> + * separation between the words, in the right direction. + */ + public ONearItem(int distance) { + super(distance); + } + + public ItemType getItemType() { + return ItemType.ONEAR; + } + + public String getName() { + return "ONEAR"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java b/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java new file mode 100644 index 00000000000..20d29cd9c0e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/OrItem.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An or'ing of a collection of sub-expressions + * + * @author bratseth + */ +public class OrItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.OR; + } + + public String getName() { + return "OR"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java new file mode 100644 index 00000000000..130eafe49ef --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java @@ -0,0 +1,266 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * A term which contains a phrase - a collection of word terms + * + * @author bratseth + * @author havardpe + */ +public class PhraseItem extends CompositeIndexedItem { + + /** Whether this was explicitly written as a phrase using quotes by the user */ + private boolean explicit = false; + + /** Creates an empty phrase */ + public PhraseItem() {} + + /** Creates an empty phrase which will search the given index */ + public PhraseItem(String indexName) { + setIndexName(indexName); + } + + /** Creates a phrase containing the given words */ + public PhraseItem(String[] words) { + for (int i = 0; i < words.length; i++) { + addIndexedItem(new WordItem(words[i])); + } + } + + public ItemType getItemType() { + return ItemType.PHRASE; + } + + public String getName() { + return "PHRASE"; + } + + public void setIndexName(String index) { + super.setIndexName(index); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem word = (IndexedItem) i.next(); + word.setIndexName(index); + } + } + + /** + * Sets whether this was explicitly written as a phrase using quotes by the + * user + */ + public void setExplicit(boolean explicit) { + this.explicit = explicit; + } + + /** + * Returns whether this was explicitly written as a phrase using quotes by + * the user Default is false + */ + public boolean isExplicit() { + return explicit; + } + + private IndexedItem convertIntToWord(Item orig) { + IntItem o = (IntItem) orig; + return new WordItem(o.stringValue(), o.getIndexName(), o.isFromQuery()); + } + + /** + * Adds subitem. The word will have its index name set to the index name of + * this phrase. If the item is a word, it will simply be added, if the item + * is a phrase, each of the words of the phrase will be added. + * + * @throws IllegalArgumentException + * if the given item is not a WordItem or PhraseItem + */ + public void addItem(Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem || item instanceof WordAlternativesItem) { + addIndexedItem((IndexedItem) item); + } else if (item instanceof IntItem) { + addIndexedItem(convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + addIndexedItem((IndexedItem) i.next()); + } + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + @Override + public void addItem(int index, Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem) { + addIndexedItem(index, (IndexedItem) item); + } else if (item instanceof IntItem) { + addIndexedItem(index, convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + addIndexedItem(index++, (WordItem) i.next()); + } + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + @Override + public Item setItem(int index, Item item) { + if (item instanceof WordItem || item instanceof PhraseSegmentItem) { + return setIndexedItem(index, (IndexedItem) item); + } else if (item instanceof IntItem) { + return setIndexedItem(index, convertIntToWord(item)); + } else if (item instanceof PhraseItem) { + PhraseItem phrase = (PhraseItem) item; + Iterator<Item> i = phrase.getItemIterator(); + // we assume we don't try to add empty phrases + IndexedItem firstItem = (IndexedItem) i.next(); + Item toReturn = setIndexedItem(index++, firstItem); + + while (i.hasNext()) { + addIndexedItem(index++, (IndexedItem) i.next()); + } + return toReturn; + } else { + throw new IllegalArgumentException("Can not add " + item + + " to a phrase"); + } + } + + private void addIndexedItem(IndexedItem word) { + word.setIndexName(this.getIndexName()); + super.addItem((Item) word); + } + + private void addIndexedItem(int index, IndexedItem word) { + word.setIndexName(this.getIndexName()); + super.addItem(index, (Item) word); + } + + private Item setIndexedItem(int index, IndexedItem word) { + word.setIndexName(this.getIndexName()); + return super.setItem(index, (Item) word); + } + + /** + * Returns a subitem as a word item + * + * @param index + * the (0-base) index of the item to return + * @throws IndexOutOfBoundsException + * if there is no subitem at index + */ + public WordItem getWordItem(int index) { + return (WordItem) getItem(index); + } + + /** + * Returns a subitem as a block item, + * + * @param index + * the (0-base) index of the item to return + * @throws IndexOutOfBoundsException + * if there is no subitem at index + */ + public BlockItem getBlockItem(int index) { + return (BlockItem) getItem(index); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + + if (subitem instanceof PhraseSegmentItem) { + PhraseSegmentItem seg = (PhraseSegmentItem) subitem; + + // "What encode does, minus what encodeThis does" + itemCount += seg.encodeContent(buffer); + } else { + itemCount += subitem.encode(buffer); + } + } + return itemCount; + } + + /** + * Returns false, no parenthezes for phrases + */ + protected boolean shouldParenthize() { + return false; + } + + /** Phrase items uses a empty heading instead of "PHRASE " */ + protected void appendHeadingString(StringBuilder buffer) { + } + + protected void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + + buffer.append("\""); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item item = i.next(); + + if (item instanceof WordItem) { + WordItem wordItem = (WordItem) item; + + buffer.append(wordItem.getWord()); + } else { + PhraseSegmentItem seg = (PhraseSegmentItem) item; + + seg.appendContentsString(buffer); + } + if (i.hasNext()) { + buffer.append(" "); + } + } + buffer.append("\""); + } + + public String getIndexedString() { + StringBuilder buf = new StringBuilder(); + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem indexedItem = (IndexedItem) i.next(); + + buf.append(indexedItem.getIndexedString()); + if (i.hasNext()) { + buf.append(' '); + } + } + return buf.toString(); + } + + protected int encodingArity() { + return getNumWords(); + } + + public int getNumWords() { + int numWords = 0; + + for (Iterator<Item> j = getItemIterator(); j.hasNext();) { + numWords += ((IndexedItem) j.next()).getNumWords(); + } + return numWords; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("explicit", explicit); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java new file mode 100644 index 00000000000..7defe67eede --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseSegmentItem.java @@ -0,0 +1,202 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; +import java.util.Iterator; + + +/** + * A term which contains a fixed length phrase, a collection of word terms, + * resulting from a single segmentation operation. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class PhraseSegmentItem extends IndexedSegmentItem { + + /** Whether this was explicitly written as a phrase using quotes by the user */ + private boolean explicit = false; + + /** + * Creates a phrase containing the same words and state (as pertinent) as + * the given SegmentAndItem. + */ + public PhraseSegmentItem(AndSegmentItem segAnd) { + super(segAnd.getRawWord(), segAnd.stringValue(), segAnd.isFromQuery(), segAnd.isStemmed(), segAnd.getOrigin()); + if (segAnd.getItemCount() > 0) { + WordItem w = (WordItem) segAnd.getItem(0); + setIndexName(w.getIndexName()); + for (Iterator<Item> i = segAnd.getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + addWordItem(word); + } + } + } + + public PhraseSegmentItem(String rawWord, boolean isFromQuery, boolean stemmed) { + super(rawWord, rawWord, isFromQuery, stemmed, null); + } + + /** + * Creates a phrase segment from strings + * + * @param rawWord the raw text as received in the request + * @param current the normalized form of the raw text, or the raw text repeated if no normalized form is known + * @param isFromQuery whether this originates in the request + * @param stemmed whether this is stemmed + */ + public PhraseSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + super(rawWord, current, isFromQuery, stemmed, null); + } + + public PhraseSegmentItem(String rawWord, String current, boolean isFromQuery, + boolean stemmed, Substring substring) { + super(rawWord, current, isFromQuery, stemmed, substring); + } + + public ItemType getItemType() { + return ItemType.PHRASE; + } + + public String getName() { + return "SPHRASE"; + } + + public void setIndexName(String index) { + super.setIndexName(index); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + WordItem word = (WordItem) i.next(); + word.setIndexName(index); + } + } + + @Override + public void setWeight(int weight) { + super.setWeight(weight); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item word = i.next(); + word.setWeight(weight); + } + } + + /** + * Adds subitem. The word will have its index name set to the index name + * of this phrase. If the item is a word, it will simply be added, + * if the item is a phrase, each of the words of the phrase will be added. + * + * @throws IllegalArgumentException if the given item is not a WordItem or PhraseItem + */ + public void addItem(Item item) { + if (item instanceof WordItem) { + addWordItem((WordItem) item); + } else { + throw new IllegalArgumentException( + "Can not add " + item + " to a segment phrase"); + } + } + + private void addWordItem(WordItem word) { + word.setIndexName(this.getIndexName()); + super.addItem(word); + } + + // TODO: Override addItem(index,item), setItem(index,item) + + /** + * Returns a subitem as a word item + * + * @param index the (0-base) index of the item to return + * @throws IndexOutOfBoundsException if there is no subitem at index + */ + public WordItem getWordItem(int index) { + return (WordItem) getItem(index); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + } + + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return encodeContent(buffer, 1); + } + + public int encodeContent(ByteBuffer buffer) { + return encodeContent(buffer, 0); + } + + private int encodeContent(ByteBuffer buffer, int itemCount) { + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + Item subitem = i.next(); + itemCount += subitem.encode(buffer); + } + return itemCount; + } + + + /** + * Returns false, no parenthezes for phrases + */ + protected boolean shouldParenthize() { + return false; + } + + /** Segment phrase items uses a empty heading instead of "SPHRASE " */ + protected void appendHeadingString(StringBuilder buffer) {} + + protected void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + appendContentsString(buffer); + } + + void appendContentsString(StringBuilder buffer) { + buffer.append("'"); + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + WordItem wordItem = (WordItem) i.next(); + + buffer.append(wordItem.getWord()); + if (i.hasNext()) { + buffer.append(" "); + } + } + buffer.append("'"); + } + + // TODO: Must check all pertinent items + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + // PhraseSegmentItem other = (PhraseSegmentItem) object; // Ensured by superclass + return true; + } + + public String getIndexedString() { + StringBuilder buf = new StringBuilder(); + + for (Iterator<Item> i = getItemIterator(); i.hasNext();) { + IndexedItem indexedItem = (IndexedItem) i.next(); + + buf.append(indexedItem.getIndexedString()); + if (i.hasNext()) { + buf.append(' '); + } + } + return buf.toString(); + } + + public boolean isExplicit() { + return explicit; + } + + public void setExplicit(boolean explicit) { + this.explicit = explicit; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("explicit", explicit); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java new file mode 100644 index 00000000000..6a1306ddb2d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PredicateQueryItem.java @@ -0,0 +1,246 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; + +/** + * A PredicateQueryItem is a collection of feature/value-pairs + * that are used to query predicate fields, which contains boolean + * constraints. If the feature/value-pairs from the PredicateQueryItem + * satisfies the boolean constraints, the document is a match. + * + * @author Magnar Nedland + */ +public class PredicateQueryItem extends SimpleTaggableItem { + + private String fieldName = "predicate"; + private ArrayList<Entry> features = new ArrayList<>(); + private ArrayList<RangeEntry> rangeFeatures = new ArrayList<>(); + public static final long ALL_SUB_QUERIES = 0xffffffffffffffffL; + + /** + * Sets the field name to be used for the predicates. + * @param index name of the field. + */ + @Override + public void setIndexName(String index) { + this.fieldName = index; + } + + /** + * @return the field name used for the predicates. + */ + public String getIndexName() { + return fieldName; + } + + /** + * Adds a feature/value-pair to the predicate query. This feature is applied to all sub queries. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + */ + public void addFeature(String key, String value) { + addFeature(key, value, ALL_SUB_QUERIES); + } + + /** + * Adds a feature/value-pair to the predicate query. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + * @param subQueryBitmap bitmap specifying which sub queries this feature applies to. + */ + public void addFeature(String key, String value, long subQueryBitmap) { + addFeature(new Entry(key, value, subQueryBitmap)); + } + + /** + * Adds a feature/value-pair to the predicate query. + * @param entry the feature to add. + */ + public void addFeature(Entry entry) { + features.add(entry); + } + + /** + * Adds a range feature with a given value to the predicate query. + * This feature is applied to all sub queries. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + */ + public void addRangeFeature(String key, long value) { + addRangeFeature(key, value, ALL_SUB_QUERIES); + } + + /** + * Adds a range feature with a given value to the predicate query. + * @param key name of the feature to be set in this query. + * @param value value of the feature. + * @param subQueryBitmap bitmap specifying which sub queries this feature applies to. + */ + public void addRangeFeature(String key, long value, long subQueryBitmap) { + addRangeFeature(new RangeEntry(key, value, subQueryBitmap)); + } + + /** + * Adds a range feature with a given value to the predicate query. + * @param entry the feature to add. + */ + public void addRangeFeature(RangeEntry entry) { + rangeFeatures.add(entry); + } + + /** + * @return a mutable collection of feature entries. + */ + public Collection<Entry> getFeatures() { + return features; + } + + /** + * @return a mutable collection of range feature entries. + */ + public Collection<RangeEntry> getRangeFeatures() { + return rangeFeatures; + } + + @Override + public ItemType getItemType() { + return ItemType.PREDICATE_QUERY; + } + + @Override + public String getName() { + return "PREDICATE_QUERY_ITEM"; + } + + @Override + public int encode(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(fieldName, buffer); + encodeFeatures(features, buffer); + encodeFeatures(rangeFeatures, buffer); + return 1; // number of encoded stack dump items + } + + private void encodeFeatures(ArrayList<? extends EntryBase> features, ByteBuffer buffer) { + IntegerCompressor.putCompressedPositiveNumber(features.size(), buffer); + for (EntryBase e : features) { + e.encode(buffer); + } + } + + @Override + public int getTermCount() { + return 1; // one big term + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + boolean first = true; + for (Entry e : features) { + if (!first) { + buffer.append(", "); + } else { + first = false; + } + buffer.append(e.getKey()).append('=').append(e.getValue()); + if (e.getSubQueryBitmap() != ALL_SUB_QUERIES) { + buffer.append("[0x").append(Long.toHexString(e.getSubQueryBitmap())).append(']'); + } + } + for (RangeEntry e : rangeFeatures) { + if (!first) { + buffer.append(", "); + } else { + first = false; + } + buffer.append(e.getKey()).append(':').append(e.getValue()); + if (e.getSubQueryBitmap() != ALL_SUB_QUERIES) { + buffer.append("[0x").append(Long.toHexString(e.getSubQueryBitmap())).append(']'); + } + } + } + + @Override + public PredicateQueryItem clone() { + PredicateQueryItem clone = (PredicateQueryItem)super.clone(); + clone.features = new ArrayList<>(this.features); + clone.rangeFeatures = new ArrayList<>(this.rangeFeatures); + return clone; + } + + public abstract static class EntryBase { + private String key; + private long subQueryBitmap; + + public EntryBase(String key, long subQueryBitmap) { + this.key = key; + this.subQueryBitmap = subQueryBitmap; + } + + public String getKey() { + return key; + } + + public long getSubQueryBitmap() { + return subQueryBitmap; + } + + public void setSubQueryBitmap(long subQueryBitmap) { + this.subQueryBitmap = subQueryBitmap; + } + + public abstract void encode(ByteBuffer buffer); + } + + public static class Entry extends EntryBase { + private String value; + + public Entry(String key, String value) { + this(key, value, ALL_SUB_QUERIES); + } + public Entry(String key, String value, long subQueryBitmap) { + super(key, subQueryBitmap); + this.value = value; + } + + public String getValue() { + return value; + } + + @Override + public void encode(ByteBuffer buffer) { + putString(getKey(), buffer); + putString(getValue(), buffer); + buffer.putLong(getSubQueryBitmap()); + } + } + + public static class RangeEntry extends EntryBase { + private long value; + + public RangeEntry(String key, long value) { + this(key, value, ALL_SUB_QUERIES); + } + + public RangeEntry(String key, long value, long subQueryBitmap) { + super(key, subQueryBitmap); + this.value = value; + } + + public long getValue() { + return value; + } + + @Override + public void encode(ByteBuffer buffer) { + putString(getKey(), buffer); + buffer.putLong(getValue()); + buffer.putLong(getSubQueryBitmap()); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java new file mode 100644 index 00000000000..9c3a88178f7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PrefixItem.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word which matches beginnings of words instead of complete words + * + * @author bratseth + */ +public class PrefixItem extends WordItem { + + public PrefixItem(String prefix) { + this(prefix, false); + } + + public PrefixItem(String prefix, boolean isFromQuery) { + super(prefix, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.PREFIX; + } + + public String getName() { + return "PREFIX"; + } + + public String stringValue() { + return getWord() + "*"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java new file mode 100644 index 00000000000..9a78d4c8765 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedInteger.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:42 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public class PureWeightedInteger extends PureWeightedItem { + + private final long value; + + public PureWeightedInteger(long value) { + this(value, 100); + } + public PureWeightedInteger(long value, int weight) { + super(weight); + this.value = value; + } + + @Override + public ItemType getItemType() { + return ItemType.PURE_WEIGHTED_INTEGER; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + buffer.putLong(value); + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(value); + super.appendBodyString(buffer); + } + public long getValue() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java new file mode 100644 index 00000000000..16f38159235 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedItem.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:24 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public abstract class PureWeightedItem extends Item { + + public PureWeightedItem(int weight) { + setWeight(weight); + } + @Override + public void setIndexName(String index) { + // No index + } + + @Override + public String getName() { + return getItemType().name(); //To change body of implemented methods use File | Settings | File Templates. + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(':').append(getWeight()); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java new file mode 100644 index 00000000000..368ccd25483 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/PureWeightedString.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Created with IntelliJ IDEA. + * User: balder + * Date: 07.12.12 + * Time: 13:15 + * To change this template use File | Settings | File Templates. + */ +// TODO: Fix javadoc +public class PureWeightedString extends PureWeightedItem { + + private final String value; + + public PureWeightedString(String value) { + this(value, 100); + } + public PureWeightedString(String value, int weight) { + super(weight); + this.value = value; + } + + @Override + public ItemType getItemType() { + return ItemType.PURE_WEIGHTED_STRING; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(value, buffer); + } + + @Override + public int getTermCount() { + return 1; + } + + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(value); + super.appendBodyString(buffer); + } + + public String getString() { + return value; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java new file mode 100644 index 00000000000..410eb1c35f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.*; + + +/** + * A class which canonicalizes and validates queries. + * This class is multithread safe. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class QueryCanonicalizer { + + /** The name of the operation performed by this (for use in search chain ordering) */ + public static final String queryCanonicalization = "queryCanonicalization"; + + /** + * Validates this query and carries out possible operations on this query + * which simplifies it without changing its semantics. + * + * @return null if the query is valid, an error message if it is invalid + */ + public static String canonicalize(Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + return canonicalize(query, root); + } + + /** + * Validates this query and carries out possible operations on this query + * which simplifies it without changing its semantics. + * + * @return null if the query is valid, an error message if it is invalid + */ + public static String canonicalize(QueryTree query) { + QueryWrapper q = new QueryWrapper(); + q.setRoot(query.getRoot()); // Could get rid of the wrapper... + treeCanonicalize(q, query.getRoot(), null); + query.setRoot(q.root); + return q.error; + } + + /** + * Validates this query and + * carries out possible operations on this query which simplifies it + * without changing its semantics. + * + * @param item the item to canonicalize + * @return null if the query is valid, an error message if it is invalid + */ + private static String canonicalize(Query query, Item item) { + QueryWrapper q = new QueryWrapper(); + q.setRoot(item); + treeCanonicalize(q, query.getModel().getQueryTree().getRoot(), null); + if (q.root == null) + q.root = new NullItem(); + query.getModel().getQueryTree().setRoot(q.root); + return q.error; + } + + /** + * @param bag wrapper for error message and query root + * @param item the item to canonicalize + * @param iterator iterator for the above item if pertinent + * @return whether the query could be canonicalized into something + */ + public static boolean treeCanonicalize(QueryWrapper bag, Item item, ListIterator<Item> iterator) { + if (iterator == null && (item == null || item instanceof NullItem)) { + bag.setError("No query"); + return false; + } + + if (item instanceof TermItem) { + return true; + } + + if (item instanceof NullItem) { + iterator.remove(); + } + + if ( ! (item instanceof CompositeItem)) { + return true; + } // Impossible yet + CompositeItem composite = (CompositeItem) item; + + for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext();) { + Item child = i.next(); + boolean subtreeOK = treeCanonicalize(bag, child, i); + + if (!subtreeOK) { + return false; + } + } + + if (composite instanceof EquivItem) { + removeDuplicates((EquivItem) composite); + } + else if (composite instanceof RankItem) { + makeDuplicatesCheap((RankItem)composite); + } + else if (composite instanceof NotItem) { + if (((NotItem) composite).getPositiveItem() == null) { + bag.setError("Can not search for only negative items"); + return false; + } + } + + if (composite.getItemCount() == 0) { + if (iterator == null) { + bag.setRoot(new NullItem()); + bag.setError("No query: Contained an empty " + composite.getName() + " only"); + return false; + } else { + iterator.remove(); + } + } + + if (composite.getItemCount() == 1 && ! (composite instanceof NonReducibleCompositeItem)) { + if (composite instanceof PhraseItem || composite instanceof PhraseSegmentItem) { + composite.getItem(0).setWeight(composite.getWeight()); + } + if (iterator == null) { + bag.setRoot(composite.getItem(0)); + } else { + iterator.set(composite.getItem(0)); + } + } + + return true; + } + + private static void removeDuplicates(EquivItem composite) { + int origSize = composite.getItemCount(); + for (int i = origSize - 1; i >= 1; --i) { + Item deleteCandidate = composite.getItem(i); + for (int j = 0; j < i; ++j) { + Item check = composite.getItem(j); + if (deleteCandidate.getClass() == check.getClass()) { + if (deleteCandidate instanceof PhraseItem) { + PhraseItem phraseDeletionCandidate = (PhraseItem) deleteCandidate; + PhraseItem phraseToCheck = (PhraseItem) check; + if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) { + composite.removeItem(i); + break; + } + } else if (deleteCandidate instanceof PhraseSegmentItem) { + PhraseSegmentItem phraseSegmentDeletionCandidate = (PhraseSegmentItem) deleteCandidate; + PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check; + if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) { + composite.removeItem(i); + break; + } + } else if (deleteCandidate instanceof BlockItem) { + BlockItem blockDeletionCandidate = (BlockItem) deleteCandidate; + BlockItem blockToCheck = (BlockItem) check; + if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) { + composite.removeItem(i); + break; + } + } + } + } + } + } + + /** + * If a term is present as both a rank term (i.e not the first child) and in + * the match condition (first child), then turn off any rank calculation for + * the term during matching, as it will be made available anyway for matches + * by the same term in the rank part. + * + * @param rankItem + * an item which will be simplified in place + */ + private static void makeDuplicatesCheap(RankItem rankItem) { + // Collect terms used for ranking + Set<TermItem> rankTerms = new HashSet<>(); + for (int i = 1; i < rankItem.getItemCount(); i++) { + if (rankItem.getItem(i) instanceof TermItem) + rankTerms.add((TermItem)rankItem.getItem(i)); + } + + // Make terms used for matching cheap if they also are ranking terms + makeDuplicatesCheap(rankItem.getItem(0), rankTerms); + } + + private static void makeDuplicatesCheap(Item item, Set<TermItem> rankTerms) { + if (item instanceof CompositeItem) { + for (ListIterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext();) + makeDuplicatesCheap(i.next(), rankTerms); + } + else if (rankTerms.contains(item)) { + item.setRanked(false); + item.setPositionData(false); + } + } + + public static class QueryWrapper { + private Item root = null; + private String error = null; + + public Item getRoot() { return root; } + public void setRoot(Item root) { + this.root = root; + } + public String getError() { + return error; + } + public void setError(String error) { + this.error = error; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java new file mode 100644 index 00000000000..58dc73cd0e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryException.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Runtime exception to mark errors in query parsing. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryException extends RuntimeException { + private static final long serialVersionUID = -2975856668328596533L; + + public QueryException(String message) { + super(message); + } + + public QueryException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java new file mode 100644 index 00000000000..4db8ff0b47a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RangeItem.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * This class represents a numeric range. You can also specify the number of hits you want this range to produce, + * which can be used to create more efficient searches. + * Note that '0' as hit limit means all hits matching the range criterion will be a match, + * while positive numbers start from 'from' working + * its way towards 'to' until it has reached its limit or range is exhausted. Negative number means that it will start + * from 'to' and work its way towards 'from'. + * + * @author balder + * @author bratseth + * @since 5.1.23 + */ +// Note that this is just a convenience subclass of IntItem - it does not add any functionality not available in it. +public class RangeItem extends IntItem { + + /** + * Creates a new range operator + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param indexName the index to search for this range + */ + public RangeItem(Number from, Number to, String indexName) { + this(from, to, indexName, false); + } + + /** + * Creates a new range operator + * + * @param from start point for range + * @param to end point for range + * @param indexName the index to search for this range + */ + public RangeItem(Limit from, Limit to, String indexName) { + this(from, to, indexName, false); + } + + /** + * Creates a new range operator + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Number from, Number to, String indexName, boolean isFromQuery) { + this(from, to, 0, indexName, isFromQuery); + } + + /** + * Creates a new range operator + * + * @param from start point for range + * @param to end point for range + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Limit from, Limit to, String indexName, boolean isFromQuery) { + this(from, to, 0, indexName, isFromQuery); + } + + /** + * + * @param from inclusive start point for range + * @param to inclusive end point for range + * @param hitLimit This tells how many results you want included from this range as a minimum. + * You might get less if there are not enough, or you might get more. It will use the dictionary and + * include enough entries to satisfy your request. + * Positive number will start from left (@from) and work right. + * Negative number will start from right and work its way left. + * 0 means no limit. + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Number from, Number to, int hitLimit, String indexName, boolean isFromQuery) { + this(new Limit(from, true), new Limit(to, true), hitLimit, indexName, isFromQuery); + } + + /** + * + * @param from start point for range + * @param to end point for range + * @param hitLimit This tells how many results you want included from this range as a minimum. + * You might get less if there are not enough, or you might get more. It will use the dictionary and + * include enough entries to satisfy your request. + * Positive number will start from left (@from) and work right. + * Negative number will start from right and work its way left. + * 0 means no limit. + * @param indexName the index to search for this range + * @param isFromQuery Indicate if this stems directly from the user given query, + * or if you have constructed it at will. + */ + public RangeItem(Limit from, Limit to, int hitLimit, String indexName, boolean isFromQuery) { + super(from, to, hitLimit, indexName, isFromQuery); + } + + /** Returns the lower limit of this range, which may be negative infinity */ + public final Number getFrom() { + return getFromLimit().number(); + } + + /** Returns the upper limit of this range, which may be positive infinity */ + public final Number getTo() { + return getToLimit().number(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java new file mode 100644 index 00000000000..3ff2857b915 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RankItem.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * Represents the rank operator, which only orders the result set and + * does not change which hits are returned. + * + * The first argument is the part selecting the result set, the + * following operands are used to order the result and does not affect + * which hits are returned. + * + * @author bratseth + */ +public class RankItem extends CompositeItem { + + public ItemType getItemType() { + return ItemType.RANK; + } + + public String getName() { + return "RANK"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java b/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java new file mode 100644 index 00000000000..5a611a8a927 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/RegExpItem.java @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Match a field with the contained regular expression. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class RegExpItem extends TermItem { + private String expression; + + public RegExpItem(String indexName, boolean isFromQuery, String expression) { + super(indexName, isFromQuery, null); + this.expression = expression; + } + + @Override + public String stringValue() { + return expression; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public void setValue(String expression) { + this.expression = expression; + } + + @Override + public String getRawWord() { + return stringValue(); + } + + @Override + public boolean isWords() { + return false; + } + + @Override + public String getIndexedString() { + return stringValue(); + } + + @Override + public ItemType getItemType() { + return ItemType.REGEXP; + } + + @Override + public String getName() { + return ItemType.REGEXP.name(); + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(getIndexedString(), buffer); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("RegExpItem [expression=").append(expression).append("]"); + return builder.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((expression == null) ? 0 : expression.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + RegExpItem other = (RegExpItem) obj; + if (expression == null) { + if (other.expression != null) { + return false; + } + } else if (!expression.equals(other.expression)) { + return false; + } + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java new file mode 100644 index 00000000000..0dd8e1c36cc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentItem.java @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + + +/** + * An immutable and'ing of a collection of sub-expressions. It does not + * extend AndItem to avoid code using instanceof handling it as an + * AndItem. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public abstract class SegmentItem extends CompositeItem implements BlockItem { + + private boolean locked = false; + private String rawWord; + private String value; + private boolean isFromQuery; + private boolean isFromUser; + private boolean stemmed; + private SegmentingRule segmentingRule = SegmentingRule.LANGUAGE_DEFAULT; + private Substring origin; + + /** + * Creates a new segment item + * + * @param rawWord the raw form of this segment as received in the request + * @param current the current transformed version of the raw form, or the raw form repeated if no normalized form is known + * @param isFromQuery whether this segment stems from the query received in the request + * @param stemmed whether this is stemmed + */ + public SegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed) { + this(rawWord, current, isFromQuery, stemmed, null); + } + + /** + * Creates a new segment item + * + * @param rawWord the raw form of this segment as received in the request + * @param current the current transformed version of the raw form, or the raw form repeated if no normalized form is known + * @param isFromQuery whether this segment stems from the query received in the request + * @param stemmed whether this is stemmed + * @param origin TODO + */ + public SegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + this.rawWord = rawWord; + this.value = current; + this.stemmed = stemmed; + this.isFromQuery = isFromQuery; + isFromUser = isFromQuery; + this.origin = origin; + } + + public String getRawWord() { + return rawWord; + } + + public String getNormalized() { + return value; + } + + @Override + public String stringValue() { + return value; + } + + public boolean isFromQuery() { + return isFromQuery; + } + + public boolean isStemmed() { + return stemmed; + } + + public void lock() { + locked = true; + } + + public boolean isLocked() { + return locked; + } + + public int getNumWords() { + return getItemCount(); + } + + public void addItem(Item item) { + if (locked) { + dontAdd(); + } + super.addItem(item); + } + + public void addItem(int index, Item item) { + if (locked) { + dontAdd(); + } + super.addItem(index, item); + } + + private void dontAdd() { + throw new QueryException("Tried to add item to an immutable segment."); + } + + public Item removeItem(int index) { + if (locked) { + dontRemove(); + } + return super.removeItem(index); + } + + public boolean removeItem(Item item) { + if (locked) { + dontRemove(); + } + return super.removeItem(item); + } + + private void dontRemove() { + throw new QueryException("Tried to remove an item from an immutable segment."); + } + + // TODO: Add a getItemIterator which is safe for immutability + + /** Return a deep copy of this object */ + public SegmentItem clone() { + SegmentItem copy; + synchronized(this) { + boolean tmpLock = locked; + + locked = false; + copy = (SegmentItem) super.clone(); + locked = tmpLock; + copy.locked = tmpLock; + } + return copy; + } + + public boolean isWords() { + return true; + } + + public boolean isFromUser() { + return isFromUser; + } + + public void setFromUser(boolean isFromUser) { + this.isFromUser = isFromUser; + } + + /** Returns null right now */ + public Substring getOrigin() { + return origin; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("isFromQuery", isFromQuery); + discloser.addProperty("isFromUser", isFromUser); + discloser.addProperty("locked", locked); + discloser.addProperty("rawWord", rawWord); + discloser.addProperty("stemmed", stemmed); + } + + @Override + public SegmentingRule getSegmentingRule() { + return segmentingRule; + } + + public void setSegmentingRule(SegmentingRule segmentingRule) { + this.segmentingRule = segmentingRule; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java b/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java new file mode 100644 index 00000000000..ecd0ca4e056 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SegmentingRule.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * If a term has to be resegmented, and the result is more than one word, this + * is how the result should be handled in the query tree. For Western languages + * the default is creating a phrase, but for business reasons, some East Asian + * languages use an AND instead. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @since 5.1.28 + */ +public enum SegmentingRule { + LANGUAGE_DEFAULT, PHRASE, BOOLEAN_AND; +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java new file mode 100644 index 00000000000..b4b63ae7ed3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleIndexedItem.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Common implementation for Item classes implementing the IndexedItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeIndexedItem.java + * SimpleIndexedItem.java + * IndexedSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class SimpleIndexedItem extends SimpleTaggableItem implements IndexedItem { + + @NonNull + private String index = ""; + + /** + * The name of the index this belongs to, or "" (never null) if not specified + **/ + @NonNull + public String getIndexName() { + return index; + } + + // encode index bytes + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(index, buffer); + } + + /** Sets the name of the index to search */ + public void setIndexName(String index) { + if (index == null) { + index = ""; + } + this.index = index; + } + + /** Appends the index prefix if necessary */ + protected void appendIndexString(StringBuilder buffer) { + if (!getIndexName().equals("")) { + buffer.append(getIndexName()); + buffer.append(":"); + } + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + IndexedItem other = (IndexedItem) object; // Ensured by superclass + if (!this.index.equals(other.getIndexName())) { + return false; + } + return true; + } + + public int hashCode() { + return super.hashCode() + 113 * index.hashCode(); + } + + public abstract String getIndexedString(); + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", index); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java new file mode 100644 index 00000000000..adaa4f614e5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SimpleTaggableItem.java @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class SimpleTaggableItem extends Item implements TaggableItem { + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Substring.java b/container-search/src/main/java/com/yahoo/prelude/query/Substring.java new file mode 100644 index 00000000000..fa304d6b63a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/Substring.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An substring which also provides access to the full (query) string it is a substring of. + * This is immutable. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Substring { + + /** The start of the substring */ + public final int start; + + /** The end of the substring */ + public final int end; + + /** The string this is a substring of */ + public final String string; + + public Substring(int start, int end,String string) { + this.start = start; + this.end = end; + this.string=string; + } + + public String getValue() { + return string.substring(start,end); + } + + /** Returns the entire string this is a substring of. The start and end offsets are into this string. */ + public String getSuperstring() { return string; } + + /** + * Returns the character n places (0 base) after the end of the value substring into the superstring. + * For example charAfter(0) returns the first character after the end of the substring + * + * @return the char n planes after the end of the substring + * @throws IndexOutOfBoundsException if the string is not long enough to have a character at this position + */ + public char charAfter(int n) { + return string.charAt(end+n); + } + + /** + * Returns the character n places (0 base) before the start of the value substring into the superstring. + * For example charBefore(0) returns the first character before the start of the substring + * + * @return the char n planes before the start of the substring + * @throws IndexOutOfBoundsException if the string does not have a character at this position + */ + public char charBefore(int n) { + return string.charAt(start-1-n); + } + + @Override + public String toString() { + return "(" + start + ' ' + end + ')'; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java new file mode 100644 index 00000000000..aa33412f205 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SubstringItem.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word that matches substrings of words + * + * @author banino + */ +public class SubstringItem extends WordItem { + + public SubstringItem(String substring) { + this(substring, false); + } + + public SubstringItem(String substring, boolean isFromQuery) { + super(substring, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.SUBSTRING; + } + + public String getName() { + return "SUBSTRING"; + } + + public String stringValue() { + return "*" + getWord() + "*"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java new file mode 100644 index 00000000000..ef3b800e38f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/SuffixItem.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A word that matches a suffix of words instead of a complete word. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class SuffixItem extends WordItem { + + public SuffixItem(String suffix) { + this(suffix, false); + } + + public SuffixItem(String suffix, boolean isFromQuery) { + super(suffix, isFromQuery); + } + + public ItemType getItemType() { + return ItemType.SUFFIX; + } + + public String getName() { + return "SUFFIX"; + } + + public String stringValue() { + return "*" + getWord(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java new file mode 100644 index 00000000000..a71bd909000 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TaggableItem.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * An interface used for anything which may be addressed using an external, + * unique ID in the query tree in the backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public interface TaggableItem { + + public int getUniqueID(); + public void setUniqueID(int id); + public boolean hasUniqueID(); + + /** + * Set the connectivity to another term in the same query tree. + * This is used to influence ranking features taking proximity into account: nativeRank and a subset of the + * fieldMatch features. + * <p> + * By default consecutive query terms are 'somewhat' connected, meaning ranking features will be better in documents + * where the terms are found close to each other. This effect can be increased or decreased by manipulating the + * connectivity value. Typical use is to increase the connectivity between terms in the query that we believe are + * semantically connected. E.g in the query 'new york hotel', it is a good idea to increase the connectivity between + * "new" and "york" to ensure that a document containing "List of hotels in New York" is ranked above one containing + * "List of new hotels in York". + * + * @param item the item this should be connected to - in practice the next consecutive item in the query + * @param connectivity a value between 0 (none) and 1 (maximal), defining the connectivity between this and the + * argument item. The default connectivity is 0.1. + */ + public void setConnectivity(Item item, double connectivity); + public Item getConnectedItem(); + public double getConnectivity(); + + + /** + * Used for setting explicit term significance (in the tf/idf sense) to a single term or phrase, + * relative to the rest of the query. + * This influences ranking features which take term significance into account and overrides the default + * partial corpus based term significance computation happening in the backend. + */ + public void setSignificance(double significance); + public boolean hasExplicitSignificance(); + public void setExplicitSignificance(boolean significance); + public double getSignificance(); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java new file mode 100644 index 00000000000..1346fc9de7f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TaggableSegmentItem.java @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +/** + * Common implementation for Item classes implementing the TaggableItem interface. + * Note that this file exist in 3 copies that should be kept in sync: + * + * CompositeTaggableItem.java + * SimpleTaggableItem.java + * TaggableSegmentItem.java + * + * These should only have trivial differences. + * (multiple inheritance or mixins would have been nice). + * + * @author arnej27959 + */ +public abstract class TaggableSegmentItem extends SegmentItem implements TaggableItem { + + protected TaggableSegmentItem(String rawWord, String current, boolean isFromQuery, boolean stemmed, Substring origin) { + super(rawWord, current, isFromQuery, stemmed, origin); + } + + public int getUniqueID() { + return uniqueID; + } + + public void setUniqueID(int id) { + setHasUniqueID(true); + uniqueID = id; + } + + /** See {@link TaggableItem#setConnectivity} */ + public void setConnectivity(Item item, double connectivity) { + setHasUniqueID(true); + item.setHasUniqueID(true); + if (connectedItem != null) { + // untangle old connectivity + connectedItem.connectedBacklink = null; + } + this.connectivity = connectivity; + connectedItem = item; + connectedItem.connectedBacklink = this; + } + + public Item getConnectedItem() { + return connectedItem; + } + + public double getConnectivity() { + return connectivity; + } + + public void setSignificance(double significance) { + setHasUniqueID(true); + setExplicitSignificance(true); + this.significance = significance; + } + + public void setExplicitSignificance(boolean explicitSignificance) { + this.explicitSignificance = explicitSignificance; + } + + public boolean hasExplicitSignificance() { + return explicitSignificance; + } + + public double getSignificance() { + return significance; + } + + //Change access privilege from protected to public. + public boolean hasUniqueID() { + return super.hasUniqueID(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java b/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java new file mode 100644 index 00000000000..d20ee304b57 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermItem.java @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + + +/** + * <p>A query term, that is, not only a term in the query language + * (an <i>item</i>), but also a term to be found in (or excluded from) + * the search index.</p> + * + * <p>Optionally, a TermItem may also specify the name of an + * index backend to search.</p> + * + * @author bratseth + * @author havardpe + */ +public abstract class TermItem extends SimpleIndexedItem implements BlockItem { + + /** Whether the term is from the raw query or is synthetic. */ + private final boolean isFromQuery; + + /** Whether accent dropping should be performed */ + private boolean normalizable = true; + + /** The substring which is the raw form of the source of this token, or null if none. */ + private Substring origin; + + private SegmentingRule segmentingRule = SegmentingRule.LANGUAGE_DEFAULT; + + public TermItem() { + this(""); + } + + public TermItem(String indexName) { + this(indexName, false); + } + + public TermItem(String indexName, boolean isFromQuery) { + this(indexName, isFromQuery, null); + } + + protected TermItem(String indexName, boolean isFromQuery, Substring origin) { + setIndexName(indexName); + this.isFromQuery = isFromQuery; + this.origin = origin; + } + + final public int encode(ByteBuffer buffer) { + encodeThis(buffer); + return 1; + } + + /** Appends the index prefix if necessary and delegates to the subclass */ + protected final void appendBodyString(StringBuilder buffer) { + appendIndexString(buffer); + buffer.append(stringValue()); + } + + /** + * Sets the value of this item from a string. + * + * @throws UnsupportedOperationException if this is not supported on this kind of item + */ + public abstract void setValue(String value); + + /** Returns the raw form of the text leading to this term, exactly as received, including original casing */ + public abstract String getRawWord(); + + /** + * Returns the substring which is the raw form of the text leading to this token. This substring also contains + * the superstring this substring was a part of, e.g the whole query string. + * If this did not originate directly from a user string, this is null. + */ + public Substring getOrigin() { return origin; } + + /** + * Whether this term is from the query or has been added by a searcher. + * Only terms from the user should be modified by query rewriters which attempts to improve the + * precision or recall of the user's query. + */ + public boolean isFromQuery() { return isFromQuery; } + + public abstract boolean isWords(); + + /** Sets the origin of this */ + public void setOrigin(Substring origin) { + this.origin = origin; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("origin", origin); + discloser.setValue(stringValue()); + } + + @Override + public int getTermCount() { return 1; } + + /** + * This refers to whether accent removal is a meaningful and possible + * operation for this word. It should be named "isTransformable" or similar, + * but for historical reasons that is not the case. This method has nothing + * to do with Unicode normalization. + * + * @return true if accent removal can/should be performed + */ + public boolean isNormalizable() { + return normalizable; + } + + /** + * This refers to whether accent removal is a meaningful and possible + * operation for this word. It should be named "isTransformable" or similar, + * but for historical reasons that is not the case. This method has nothing + * to do with Unicode normalization. + * + * @param normalizable + * set to true if accent removal can/should be performed + */ + public void setNormalizable(boolean normalizable) { + this.normalizable = normalizable; + } + + @Override + public SegmentingRule getSegmentingRule() { + return segmentingRule; + } + + public void setSegmentingRule(SegmentingRule segmentingRule) { + this.segmentingRule = segmentingRule; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/TermType.java b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java new file mode 100644 index 00000000000..f2b38d5eacb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/TermType.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +/** + * A term type enumeration + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class TermType { + + public static TermType RANK = new TermType("rank", RankItem.class, null, "$"); + + public static TermType AND = new TermType("and", AndItem.class, null, "+"); + + public static TermType OR = new TermType("or", OrItem.class, null, "?"); + + public static TermType NOT = new TermType("not", NotItem.class, null, "-"); + + public static TermType PHRASE = new TermType("phrase", PhraseItem.class, null, "\""); + + public static TermType DEFAULT = new TermType("", CompositeItem.class, AndItem.class, ""); + + public final String name; + + private final String sign; + private final Class<? extends CompositeItem> instanceClass; + private final Class<? extends CompositeItem> itemClass; + + private TermType(String name, Class<? extends CompositeItem> itemClass, Class<? extends CompositeItem> instanceClass, String sign) { + this.name = name; + this.itemClass = itemClass; + if (instanceClass == null) { + this.instanceClass = itemClass; + } else { + this.instanceClass = instanceClass; + } + this.sign = sign; + } + + public String getName() { + return name; + } + + /** Returns the CompositeItem type this type corresponds to, or CompositeItem if it's the default */ + public Class<? extends CompositeItem> getItemClass() { + return itemClass; + } + + /** Returns true if the class corresponding to this type is the given class */ + public boolean hasItemClass(Class<?> theClass) { + return getItemClass()==theClass; + } + + /** + * Returns an instance of the class corresponding to the given type, AndItem + * if this is the DEFAULT type + * + * @throws RuntimeException + * if an instance could not be created + */ + public Item createItemClass() { + try { + return instanceClass.newInstance(); + } catch (Exception e) { + throw new RuntimeException("Could not create an instance for item " + + this, e); + } + } + + public String toSign() { + return sign; + } + + public boolean equals(Object o) { + if (!(o instanceof TermType)) { + return false; + } + + TermType other = (TermType) o; + + return name.equals(other.name); + } + + public int hashCode() { + return name.hashCode(); + } + + public @Override String toString() { return "term type '" + name + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java b/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java new file mode 100644 index 00000000000..32205135f04 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/ToolBox.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.google.common.annotations.Beta; + +/** + * Query tree helper methods and factories. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Beta +public final class ToolBox { + + public static abstract class QueryVisitor { + /** + * Called for each item in the query tree given to + * {@link ToolBox#visit(QueryVisitor, Item)}. Return true to visit the + * sub-items of the given item, return false to ignore the sub-items. + * + * @param item + * each item in the query tree + * @return whether or not to visit the sub-items of the argument item + * (and then invoke the {@link #onExit()} method) + */ + public abstract boolean visit(Item item); + + /** + * Invoked when all sub-items have been visited, or immediately after + * visit() if there are no sub-items or visit() returned false. + */ + public abstract void onExit(); + } + + public static void visit(QueryVisitor visitor, Item item) { + if (item instanceof CompositeItem) { + if (visitor.visit(item)) { + CompositeItem composite = (CompositeItem) item; + for (int i = 0; i < composite.getItemCount(); ++i) { + visit(visitor, composite.getItem(i)); + } + } + } else { + visitor.visit(item); + } + visitor.onExit(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java new file mode 100644 index 00000000000..df967464230 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WandItem.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import java.nio.ByteBuffer; + +/** + * A weighted set query item to be evaluated as a Wand with dot product scoring. + * + * The dot product is calculated between the matched tokens of the weighted set field searched + * and the weights associated with the tokens of this WandItem. + * The resulting dot product will be available as a raw score in the rank framework. + * + * @since 5.1.27 + * @author <a href="mailto:geirst@yahoo-inc.com">Geir Storli</a> + */ +public class WandItem extends WeightedSetItem { + + private int targetNumHits; + private double scoreThreshold = 0; + private double thresholdBoostFactor = 1; + + /** + * Creates an empty WandItem. + * @param fieldName The name of the weighted set field to search with this WandItem. + * @param targetNumHits The target for minimum number of hits to produce by the backend search operator handling this WandItem. + */ + public WandItem(String fieldName, int targetNumHits) { + super(fieldName); + this.targetNumHits = targetNumHits; + } + + /** + * Sets the initial score threshold used by the backend search operator handling this WandItem. + * The score of a document must be larger than this threshold in order to be considered a match. + * Default value is 0.0. + * @param scoreThreshold the initial score threshold. + */ + public void setScoreThreshold(double scoreThreshold) { + this.scoreThreshold = scoreThreshold; + } + + /** + * Sets the boost factor used by the backend search operator to boost the threshold before + * comparing it with the upper bound score of the document being evaluated. + * A large value of this factor results in fewer full evaluations and in an expected loss in precision. + * Similarly, a gain in performance might be expected. Default value is 1.0. + * + * NOTE: This boost factor is only used when this WandItem is searching a Vespa field. + * @param thresholdBoostFactor the boost factor. + */ + public void setThresholdBoostFactor(double thresholdBoostFactor) { + this.thresholdBoostFactor = thresholdBoostFactor; + } + + public int getTargetNumHits() { + return targetNumHits; + } + + public double getScoreThreshold() { + return scoreThreshold; + } + + public double getThresholdBoostFactor() { + return thresholdBoostFactor; + } + + @Override + public ItemType getItemType() { + return ItemType.WAND; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(targetNumHits, buffer); + buffer.putDouble(scoreThreshold); + buffer.putDouble(thresholdBoostFactor); + } + + @Override + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(targetNumHits); + buffer.append(","); + buffer.append(scoreThreshold); + buffer.append(","); + buffer.append(thresholdBoostFactor); + buffer.append(")"); + buffer.append(" "); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("targetNumHits", targetNumHits); + discloser.addProperty("scoreThreshold", scoreThreshold); + discloser.addProperty("thresholdBoostFactor", thresholdBoostFactor); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java new file mode 100644 index 00000000000..967b9d17256 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WeakAndItem.java @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; + +/** + * Weak And of a collection of sub-expressions: + * this behaves like an OR unless many hits are returned and then + * it starts acting more like an AND. + * Alternately it can be viewed as an n-of-m operator where n + * is 1 at first and then increases gradually to m as more hits + * are seen. + * + * @author arnej27959 + */ +public final class WeakAndItem extends NonReducibleCompositeItem { + + private int N; + @NonNull + private String index; + private int scoreThreshold = 0; + + public ItemType getItemType() { + return ItemType.WEAK_AND; + } + + public String getName() { + return "WAND"; + } + + /** + * Make a WAND item with no children. You can mention a common index or you can mention it on each child. + * @param index The index it shall search. + * @param N the target for minimum number of hits to produce; + * a backend will not suppress any hits in the operator + * until N hits have been produced. + **/ + public WeakAndItem(String index, int N) { + this.N = N; + if (index == null) { + this.index = ""; + } else { + this.index = index; + } + } + public WeakAndItem(int N) { + this("", N); + } + + /** Sets the index name of all subitems of this */ + public void setIndexName(String index) { + String toSet; + if (index == null) { + toSet = ""; + } else { + toSet = index; + } + super.setIndexName(toSet); + this.index = toSet; + } + + @NonNull + public String getIndexName() { + return index; + } + + /** Appends the heading of this string - <code>[getName()]([limit]) </code> */ + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(N); + buffer.append(")"); + buffer.append(" "); + } + + /** The default N used if none is specified: 100 */ + public static final int defaultN = 100; + + /** Creates a WAND item with default N */ + public WeakAndItem() { + this(defaultN); + } + + public int getN() { + return N; + } + + public void setN(int N) { + this.N = N; + } + + public int getScoreThreshold() { + return scoreThreshold; + } + + /** + * Sets the score threshold used by the backend search operator handling this WeakAndItem. + * This threshold is currently only used if the WeakAndItem is searching a RISE index field. + * The score threshold then specifies the minimum dot product score a match needs to be part of the result set. + * Default value is 0. + * @param scoreThreshold the score threshold. + */ + public void setScoreThreshold(int scoreThreshold) { + this.scoreThreshold = scoreThreshold; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(N, buffer); + putString(index, buffer); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("N", N); + } + + public int hashCode() { + return super.hashCode() + 31 * N; + } + + /** + * Returns whether this item is of the same class and + * contains the same state as the given item + */ + public boolean equals(Object object) { + if (!super.equals(object)) return false; + WeakAndItem other = (WeakAndItem) object; // Ensured by superclass + if (this.N != other.N) return false; + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java new file mode 100644 index 00000000000..eb6737ba9d8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WeightedSetItem.java @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import com.yahoo.collections.CopyOnWriteHashMap; +import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; + +import edu.umd.cs.findbugs.annotations.NonNull; + +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.Map; + +/** + * A term which contains a weighted set. + * + * When using a weighted set to search a field, all tokens present in + * the searched field will be reverse matched against the weighted + * set. This means that using a weighted set to search a single-value + * attribute field will have similar semantics to using a normal term + * to search a weighted set field. The low-level matching information + * resulting from matching a document with a weighted set term will + * contain the weights of all the matched tokens in descending + * order. Each matched weight will be represented as a standard + * occurrence on position 0 in element 0. + * + */ +public class WeightedSetItem extends SimpleTaggableItem { + + @NonNull + private String indexName = ""; + + private CopyOnWriteHashMap<Object,Integer> set = new CopyOnWriteHashMap<>(1000); + + /** Creates an empty weighted set; note you must provide an index name up front */ + public WeightedSetItem(String indexName) { + if (indexName == null) { + this.indexName = ""; + } else { + this.indexName = indexName; + } + } + + public Integer addToken(long value, int weight) { + return addInternal(value, weight); + } + /** + * Add weighted token. + * If token is already in the set, the maximum weight is kept. + * NOTE: The weight must be 1 or more; negative values (and zero) are not allowed. + * @return weight of added token (might be old value, if kept) + */ + public Integer addToken(String token, int weight) { + if (token == null) throw new IllegalArgumentException("token must be a string"); + return addInternal(token, weight); + } + private Integer addInternal(Object token, int weight) { + Integer newWeight = weight; + Integer oldWeight = set.put(token, newWeight); + if (oldWeight != null && oldWeight > newWeight) { + set.put(token, oldWeight); + return oldWeight; + } + return newWeight; + } + + /** + * Add token with weight 1. + */ + public Integer addToken(String token) { + return addToken(token, 1); + } + + public Integer getTokenWeight(String token) { + return set.get(token); + } + + public Integer removeToken(String token) { + return set.remove(token); + } + + public int getNumTokens() { + return set.size(); + } + + public Iterator<Map.Entry<Object,Integer>> getTokens() { + return set.entrySet().iterator(); + } + + @Override + public void setIndexName(String index) { + if (index == null) { + this.indexName = ""; + } else { + this.indexName = index; + } + } + + @NonNull + public String getIndexName() { + return indexName; + } + + @Override + public ItemType getItemType() { + return ItemType.WEIGHTEDSET; + } + + @Override + public String getName() { + return getItemType().name(); + } + + // for tracing - random text format + @Override + protected void appendBodyString(StringBuilder buffer) { + buffer.append(indexName); + buffer.append("{"); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + buffer.append("["); + buffer.append(entry.getValue()); + buffer.append("]:\""); + buffer.append(entry.getKey()); + buffer.append("\","); + } + buffer.deleteCharAt(buffer.length() - 1); // remove extra "," + buffer.append("}"); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("index", indexName); + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + WordItem subitem = new WordItem(entry.getKey().toString(), indexName); + subitem.setWeight(entry.getValue()); + discloser.addChild(subitem); + } + } + + @Override + public int encode(ByteBuffer buffer) { + encodeThis(buffer); + int itemCount = 1; + for (Map.Entry<Object, Integer> entry : set.entrySet()) { + Object key = entry.getKey(); + if (key instanceof Long) { + new PureWeightedInteger((Long)key, entry.getValue()).encode(buffer); + } else { + new PureWeightedString(key.toString(), entry.getValue()).encode(buffer); + } + itemCount++; + } + return itemCount; + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + IntegerCompressor.putCompressedPositiveNumber(set.size(), buffer); + putString(indexName, buffer); + } + + @Override + public int getTermCount() { + return 1; // this is just one (big) term + } + + @Override + public WeightedSetItem clone() { + WeightedSetItem clone = (WeightedSetItem)super.clone(); + clone.set = this.set.clone(); + return clone; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java new file mode 100644 index 00000000000..b31dd2bd18e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WordAlternativesItem.java @@ -0,0 +1,183 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableList; +import com.yahoo.compress.IntegerCompressor; + +/** + * A set words with differing exactness scores to be used for literal boost + * ranking. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class WordAlternativesItem extends TermItem { + + private List<Alternative> alternatives; + private int maxIndex; + + public static final class Alternative { + public final String word; + public final double exactness; + + public Alternative(String word, double exactness) { + super(); + this.word = word; + this.exactness = exactness; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("Alternative [word=").append(word).append(", exactness=").append(exactness).append("]"); + return builder.toString(); + } + } + + public WordAlternativesItem(String indexName, boolean isFromQuery, Substring origin, Collection<Alternative> terms) { + super(indexName, isFromQuery, origin); + setAlternatives(terms); + } + + public void setAlternatives(Collection<Alternative> terms) { + this.alternatives = uniqueAlternatives(terms); + setMaxIndex(); + } + + private static ImmutableList<Alternative> uniqueAlternatives(Collection<Alternative> terms) { + List<Alternative> uniqueTerms = new ArrayList<Alternative>(terms.size()); + for (Alternative term : terms) { + int i = Collections.binarySearch(uniqueTerms, term, (t0, t1) -> t0.word.compareTo(t1.word)); + if (i >= 0) { + Alternative old = uniqueTerms.get(i); + if (old.exactness < term.exactness) { + uniqueTerms.set(i, term); + } + } else { + uniqueTerms.add(~i, term); + } + } + return ImmutableList.copyOf(uniqueTerms); + } + + private void setMaxIndex() { + int maxIndex = 0; + int currentIndex = 0; + double maxScore = 0.0d; + boolean first = true; + for (Alternative val : this.alternatives) { + if (first) { + first = false; + maxIndex = 0; + maxScore = val.exactness; + } else { + if (val.exactness > maxScore) { + maxScore = val.exactness; + maxIndex = currentIndex; + } + } + ++currentIndex; + } + this.maxIndex = maxIndex; + } + + @Override + public String stringValue() { + return alternatives.get(maxIndex).word; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public int getNumWords() { + return alternatives.size(); + } + + @Override + public void setValue(String value) { + throw new UnsupportedOperationException("semantics for setting to a string would be brittle, use setAlternatives()"); + } + + @Override + public String getRawWord() { + if (getOrigin() == null) { + return stringValue(); + } else { + return getOrigin().getValue(); + } + } + + @Override + public boolean isWords() { + return true; + } + + @Override + public String getIndexedString() { + return alternatives.stream().map((x) -> x.word).collect(Collectors.joining(" ")); + } + + @Override + public ItemType getItemType() { + return ItemType.WORD_ALTERNATIVES; // placeholder + } + + @Override + public String getName() { + return "WORD_ALTERNATIVES"; + } + + /** + * Return an immutable snapshot of the contained terms. This list will not + * reflect later changes to the item. + * + * @return an immutable list of word alternatives and their respective + * scores + */ + public List<Alternative> getAlternatives() { + return alternatives; + } + + + @Override + public void encodeThis(ByteBuffer target) { + super.encodeThis(target); + IntegerCompressor.putCompressedPositiveNumber(getNumWords(), target); + for (Alternative a : alternatives) { + Item p = new PureWeightedString(a.word, (int) (getWeight() * a.exactness + 0.5)); + p.setFilter(isFilter()); + p.encode(target); + } + } + + /** + * Add a new alternative iff the term string is not already present with an + * equal or higher exactness score. If the term string is present with a + * lower exactness score, the new, higher score will take precedence. + * + * @param term + * one of several string interpretations of the input word + * @param exactness + * how close the term string matches what the user input + */ + public void addTerm(String term, double exactness) { + // do note, Item is Cloneable, and overwriting the reference is what + // saves us from overriding the method + if (alternatives.stream().anyMatch((a) -> a.word.equals(term) && a.exactness >= exactness )) { + return; + } + List<Alternative> newTerms = new ArrayList<>(alternatives.size() + 1); + newTerms.addAll(alternatives); + newTerms.add(new Alternative(term, exactness)); + setAlternatives(newTerms); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java new file mode 100644 index 00000000000..361993900ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/WordItem.java @@ -0,0 +1,188 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + + +import com.yahoo.prelude.query.parser.Token; +import com.yahoo.prelude.query.textualrepresentation.Discloser; +import com.yahoo.protect.Validator; + +import java.nio.ByteBuffer; + +/** + * A term item containing a single word. + * + * @author bratseth + * @author havardpe + */ +public class WordItem extends TermItem { + + /** True if this is <b>not</b> part of the special tokens dictionary */ + private boolean words = true; + + /** Is this word stemmed? */ + private boolean stemmed = false; + + /** Is this word produced from segmenting a block of word characters? */ + private boolean fromSegmented = false; + + /** If fromSegmented is true, this is the index into the list of segments */ + private int segmentIndex = 0; + + /** The word as it should be searched, never null */ + private String word; + + private boolean lowercased = false; + + public ItemType getItemType() { + return ItemType.WORD; + } + + public String getName() { + return "WORD"; + } + + public WordItem(String word) { + this(word, ""); + } + + public WordItem(String word, String indexName) { + this(word, indexName, false, null); + } + + public WordItem(String word, boolean isFromQuery) { + this(word, null, isFromQuery, null); + } + + public WordItem(String word, String indexName, boolean isFromQuery) { + this(word, indexName, isFromQuery, null); + } + + public WordItem(Token word, boolean isFromQuery) { + this(word.toString(), "", isFromQuery, word.substring); + } + + public WordItem(String word, boolean isFromQuery, Substring origin) { + this(word, "", isFromQuery, origin); + } + + public WordItem(String word, String indexName, boolean isFromQuery, Substring origin) { + super(indexName, isFromQuery, origin); + setWord(word); + } + + public void setWord(String word) { + Validator.ensureNotNull("Word item word", word); + this.word = word; + } + + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + putString(getEncodedWord(), buffer); + } + + /** Returns the word for encoding. By default simply the word */ + protected String getEncodedWord() { + return getIndexedString(); + } + + /** Returns the same as {@link #stringValue} */ + public String getWord() { return word; } + + /** + * Returns this word as it should be used in executing the query. + * This is usually (but not always) a normalized and stemmed form + */ + public @Override String stringValue() { return word; } + + /** Same as #setWord */ + public @Override void setValue(String value) { setWord(value); } + + /** + * Get the word exactly as received in the request. + * This returns the same as getWord if no other raw form is known + * + * @return the raw form of this word, never null + */ + @Override + public String getRawWord() { + if (getOrigin()!=null) return getOrigin().getValue(); + return word; + } + + public boolean isStemmed() { return stemmed; } + + public void setStemmed(boolean stemmed) { this.stemmed = stemmed; } + + public boolean isFromSegmented() { + return fromSegmented; + } + + public void setFromSegmented(boolean fromSegmented) { + this.fromSegmented = fromSegmented; + } + + public boolean isLowercased() { + return lowercased; + } + + public void setLowercased(boolean lowercased) { + this.lowercased = lowercased; + } + + public int getSegmentIndex() { + return segmentIndex; + } + + public void setSegmentIndex(int segmentIndex) { + this.segmentIndex = segmentIndex; + } + + /** Word items uses a empty heading instead of "WORD " */ + protected void appendHeadingString(StringBuilder buffer) {} + + public int hashCode() { + return word.hashCode() + 71 * super.hashCode(); + } + + public boolean equals(Object object) { + if (!super.equals(object)) { + return false; + } + + WordItem other = (WordItem) object; // Ensured by superclass + + if (!this.word.equals(other.word)) { + return false; + } + + return true; + } + + public int getNumWords() { + return 1; + } + + @Override + public String getIndexedString() { + return word; + } + + /** Returns true if this consists of regular word characters. Returns false if this represents a "special token" */ + public boolean isWords() { + return words; + } + + /** Sets if this consists of regular word characters (true) or represents a "special token" (false) */ + public void setWords(boolean words) { + this.words = words; + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("fromSegmented", fromSegmented); + discloser.addProperty("segmentIndex", segmentIndex); + discloser.addProperty("stemmed", stemmed); + discloser.addProperty("words", words); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/package-info.java b/container-search/src/main/java/com/yahoo/prelude/query/package-info.java new file mode 100644 index 00000000000..95dbd62849f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/package-info.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * The query model representing a boolean combination of selection criterions, where elements may be + * simple criterions, nested boolean operators, or annotated pieces of natural language text. + */ +@ExportPackage +@PublicApi +package com.yahoo.prelude.query; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java new file mode 100644 index 00000000000..fb56e10445a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -0,0 +1,311 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.language.process.Segmenter; +import com.yahoo.log.event.*; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.*; + +/** + * The Vespa query parser. + * + * @author bratseth + * @author Steinar Knutsen + */ +@SuppressWarnings("deprecation") +public abstract class AbstractParser implements CustomParser { + + /** The current submodes of this parser */ + protected Submodes submodes = new Submodes(); + + /** + * The current language of this parser. Used to decide whether and how to + * use the CJKSegmenter + */ + protected Language language = Language.UNKNOWN; + + /** The IndexFacts.Session of this query */ + protected IndexFacts.Session indexFacts; + + /** + * The counter for braces in URLs, braces in URLs are accepted so long as + * they are balanced. + */ + protected int braceLevelURL = 0; + + protected final ParserEnvironment environment; + protected final TokenPosition tokens = new TokenPosition(); + + /** + * An enumeration of the parser index-controlled submodes. Any combination + * of these may be active at the same time. SubModes are activated or + * deactivated by specifying special indexes in the query. + */ + final class Submodes { + + /** + * Url mode allows "_" and "-" as word characters. Default is false + */ + public boolean url = false; + + /** + * Site mode - host names get start of host and end of host markers. + * Default is false + */ + public boolean site = false; + + /** + * Sets submodes from an index. + * + * @param indexName the index name which should decide the submodes, or null to do nothing. + * @param session the session used to look up information about this index + */ + @SuppressWarnings({"deprecation"}) + // To avoid this we need to pass an IndexFacts.session down instead - easily done but not without breaking API's + public void setFromIndex(final String indexName, IndexFacts.Session session) { + if (indexName == null) { + return; + } + + reset(); + + final Index current = session.getIndex(indexName); + + if (current.isUriIndex()) { + url = true; + } else if (current.isHostIndex()) { + site = true; + } + } + + /** Sets default values for all submodes */ + public void reset() { + url = false; + site = false; + } + + /** + * Returns whether we are in a mode which allows explicit anchoring + * markers, ^ and $ + * + * @return True if we are doing explicit anchoring. + */ + public boolean explicitAnchoring() { + return site; + } + } + + /** + * <p>Creates a new instance of this class, storing the given {@link ParserEnvironment} for parse-time access to the + * environment.</p> + * + * @param environment The environment settings to attach to the Parser. + */ + protected AbstractParser(ParserEnvironment environment) { + this.environment = ParserEnvironment.fromParserEnvironment(environment); + if (this.environment.getIndexFacts() == null) { + this.environment.setIndexFacts(new IndexFacts()); + } + } + + @Override + public final QueryTree parse(Parsable query) { + Item root = null; + if (query != null) { + root = parse(query.getQuery(), + query.getFilter(), + query.getLanguage(), + environment.getIndexFacts().newSession(query.getSources(), query.getRestrict()), + query.getDefaultIndexName()); + } + if (root == null) { + root = new NullItem(); + } + return new QueryTree(root); + } + + @Override + public final Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName) { + if (queryToParse == null) { + return null; + } + if (parsingLanguage == null) { + parsingLanguage = environment.getLinguistics().getDetector().detect(queryToParse, null).getLanguage(); + } + setState(parsingLanguage, indexFacts); + tokenize(queryToParse, defaultIndexName, indexFacts); + Item root = parseItems(); + if (filterToParse != null) { + AnyParser filterParser = new AnyParser(environment); + if (root == null) { + root = filterParser.parseFilter(filterToParse, parsingLanguage, indexFacts); + } else { + root = filterParser.applyFilter(root, filterToParse, parsingLanguage, indexFacts); + } + } + root = simplifyPhrases(root); + if (defaultIndexName != null) { + assignDefaultIndex(indexFacts.getCanonicName(defaultIndexName), root); + } + return root; + } + + protected abstract Item parseItems(); + + /** + * Assigns the default index to query terms having no default index The + * parser _should_ have done this, for some reason it doesn't + * + * @param defaultIndex The default index to assign. + * @param item The item to check. + */ + private static void assignDefaultIndex(final String defaultIndex, + final Item item) { + if (defaultIndex == null || item == null) { + return; + } + + if (item instanceof IndexedItem) { + final IndexedItem indexName = (IndexedItem) item; + + if ("".equals(indexName.getIndexName())) { + indexName.setIndexName(defaultIndex); + } + } else if (item instanceof CompositeItem) { + final Iterator<Item> items = ((CompositeItem) item) + .getItemIterator(); + while (items.hasNext()) { + final Item i = items.next(); + assignDefaultIndex(defaultIndex, i); + } + } + } + + /** + * Unicode normalizes some piece of natural language text. The chosen form + * is compatibility decomposition, canonical composition (NFKC). + * + * @param input The string to normalize. + * @return The normalized string. + */ + protected String normalize(final String input) { + if (input == null || input.length() == 0) { + return input; + } + return environment.getLinguistics().getNormalizer().normalize(input); + } + + protected void setState(final Language queryLanguage, IndexFacts.Session indexFacts) { + this.indexFacts = indexFacts; + language = queryLanguage; + submodes.reset(); + } + + /** + * Tokenizes the given string and initializes tokens with the found tokens. + * + * @param query the string to tokenize. + * @param defaultIndexName the name of the index to use as default. + * @param indexFacts resolved information about the index we are searching + */ + protected void tokenize(String query, String defaultIndexName, IndexFacts.Session indexFacts) { + Tokenizer tokenizer = new Tokenizer(environment.getLinguistics()); + tokenizer.setSubstringSpecialTokens(language.isCjk()); + tokenizer.setSpecialTokens(environment.getSpecialTokens()); + tokens.initialize(tokenizer.tokenize(query, defaultIndexName, indexFacts)); + } + + /** + * Collapses single item phrases in the tree to the contained item. + * + * @param unwashed The item whose phrases to simplify. + * @return The simplified item. + */ + public static Item simplifyPhrases(final Item unwashed) { + if (unwashed == null) { + return unwashed; + } else if (unwashed instanceof PhraseItem) { + return collapsePhrase((PhraseItem) unwashed); + } else if (unwashed instanceof CompositeItem) { + final CompositeItem composite = (CompositeItem) unwashed; + final ListIterator<Item> i = composite.getItemIterator(); + + while (i.hasNext()) { + final Item original = i.next(); + final Item transformed = simplifyPhrases(original); + + if (original != transformed) { + i.set(transformed); + } + } + return unwashed; + } else { + return unwashed; + } + } + + private static Item collapsePhrase(final PhraseItem phrase) { + if (phrase.getItemCount() == 1 && phrase.getItem(0) instanceof WordItem) { + // TODO: Other stuff which needs propagation? + final WordItem word = (WordItem) phrase.getItem(0); + + word.setWeight(phrase.getWeight()); + return word; + } else { + return phrase; + } + } + + // TODO: The segmenting stuff is a mess now, this will fix it: + // - Make Segmenter a class which is instantiated per parsing + // - Make the instance know the language, etc and do all dispatching + // internally + // -JSB + // TODO: Use segmenting for forced phrase searches? + protected Item segment(final Token token) { + final String normalizedToken = normalize(token.toString()); + + if (token.isSpecial()) { + final WordItem w = new WordItem(token.toString(), true, token.substring); + w.setWords(false); + w.setFromSpecialToken(true); + return w; + } + + if (language == Language.UNKNOWN) { + return new WordItem(normalizedToken, true, token.substring); + } + + + Segmenter segmenter = environment.getLinguistics().getSegmenter(); + List<String> segments = segmenter.segment(normalizedToken, language); + if (segments.size() == 0) { + return null; + } + if (segments.size() == 1) { + return new WordItem(segments.get(0), "", true, token.substring); + } + + final CompositeItem composite = new PhraseSegmentItem(token.toString(), + normalizedToken, true, false, token.substring); + int n = 0; + for (final String segment : segments) { + final WordItem w = new WordItem(segment, "", true, token.substring); + w.setFromSegmented(true); + w.setSegmentIndex(n++); + w.setStemmed(false); + composite.addItem(w); + } + composite.lock(); + return composite; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java new file mode 100644 index 00000000000..411565ee32c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AdvancedParser.java @@ -0,0 +1,214 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import static com.yahoo.prelude.query.parser.Token.Kind.LBRACE; +import static com.yahoo.prelude.query.parser.Token.Kind.NUMBER; + +/** + * Parser for queries of type advanced. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @deprecated since 5.11, YQL+ should be used for formal queries + */ +@Deprecated // OK DO NOT REMOVE (we'll keep this around longer) +public class AdvancedParser extends StructuredParser { + + public AdvancedParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return advancedItems(true); + } + + protected Item handleComposite(boolean topLevel) { + return advancedItems(false); + } + + /** + * A collection of one or more advanced items. + */ + private Item advancedItems(boolean topLevel) { + int position = tokens.getPosition(); + Item item = null; + + try { + item = advancedItemsBody(topLevel); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private Item advancedItemsBody(boolean topLevel) { + Item topLevelItem = null; + Item item; + boolean itemIsComposite; + boolean topLevelIsClosed = false; + boolean expectingOperator = false; + + do { + item = null; + + if (item == null) { + item = indexableItem(); + if (item == null) { + item = compositeItem(); + itemIsComposite = true; + } else { + itemIsComposite = false; + } + if (item != null) { + Item newTop = null; + + if (expectingOperator) { + newTop = handleAdvancedOperator(topLevelItem, item, + topLevelIsClosed); + } + if (newTop != null) { // Operator found + topLevelIsClosed = false; + expectingOperator = false; + topLevelItem = newTop; + } else if (topLevelItem == null) { + topLevelItem = item; + if (itemIsComposite) { + topLevelIsClosed = true; + } + expectingOperator = true; + } else if (topLevelItem instanceof CompositeItem + && !(topLevelItem instanceof SegmentItem)) { + ((CompositeItem) topLevelItem).addItem(item); + expectingOperator = true; + } else { + AndItem and = new AndItem(); + + and.addItem(topLevelItem); + and.addItem(item); + topLevelItem = and; + topLevelIsClosed = false; + expectingOperator = true; + } + } + } + + if (topLevel && item == null) { + tokens.skip(); + } + } while (tokens.hasNext() && (topLevel || item != null)); + + // Optimize away composites containing one item only + // (including nots with only a positive) + if (topLevelItem instanceof CompositeItem + && ((CompositeItem) topLevelItem).getItemCount() == 1) { + return ((CompositeItem) topLevelItem).removeItem(0); + } + + return topLevelItem; + } + + /** Returns whether the item is a specific word item */ + private boolean isTheWord(String word, Item item) { + if (!(item instanceof WordItem)) { + return false; + } + return word.equalsIgnoreCase(((WordItem) item).getRawWord()); // TODO: Why not search for getWord w.o lowercasing? + } + + + + /** Returns the new top level, or null if the current item is not an operator */ + private Item handleAdvancedOperator(Item topLevelItem, Item item, boolean topLevelIsClosed) { + if (isTheWord("and", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof AndItem)) { + AndItem and = new AndItem(); + + and.addItem(topLevelItem); + return and; + } + return topLevelItem; + } else if (isTheWord("or", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof OrItem)) { + OrItem or = new OrItem(); + + or.addItem(topLevelItem); + return or; + } + return topLevelItem; + } else if (isTheWord("equiv", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof EquivItem)) { + EquivItem equiv = new EquivItem(); + + equiv.addItem(topLevelItem); + return equiv; + } + return topLevelItem; + } else if (isTheWord("wand", item)) { + int n=consumeNumericArgument(); + if (n==0) + n=WeakAndItem.defaultN; + if (topLevelIsClosed || !(topLevelItem instanceof WeakAndItem) || n!=((WeakAndItem)topLevelItem).getN()) { + WeakAndItem wand = new WeakAndItem(); + wand.setN(n); + wand.addItem(topLevelItem); + return wand; + } + return topLevelItem; + } else if (isTheWord("andnot", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof NotItem)) { + NotItem not = new NotItem(); + + not.addPositiveItem(topLevelItem); + return not; + } + return topLevelItem; + } else if (isTheWord("rank", item)) { + if (topLevelIsClosed || !(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + rank.addItem(topLevelItem); + return rank; + } + return topLevelItem; + } else if (isTheWord("near", item)) { + int distance = consumeNumericArgument(); + if (distance==0) + distance=NearItem.defaultDistance; + if (topLevelIsClosed || !(topLevelItem instanceof NearItem) || distance!=((NearItem)topLevelItem).getDistance()) { + NearItem near = new NearItem(distance); + + near.addItem(topLevelItem); + return near; + } + return topLevelItem; + } else if (isTheWord("onear", item)) { + int distance = consumeNumericArgument(); + if (distance==0) + distance=ONearItem.defaultDistance; + if (topLevelIsClosed || !(topLevelItem instanceof ONearItem) || distance!=((ONearItem)topLevelItem).getDistance()) { + ONearItem oNear = new ONearItem(distance); + + oNear.addItem(topLevelItem); + return oNear; + } + return topLevelItem; + } + + return null; + } + + /** Returns the argument to this operator or 0 if none */ + private int consumeNumericArgument() { + if (!tokens.currentIs(LBRACE)) return 0; + tokens.skip(LBRACE); + if (!tokens.currentIsNoIgnore(NUMBER)) throw new IllegalArgumentException("Expected an integer argument"); + int distance=Integer.valueOf(tokens.next().image); + if (!tokens.skip(Token.Kind.RBRACE)) throw new IllegalArgumentException("Expected a right brace following the argument"); + return distance; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java new file mode 100644 index 00000000000..cb540a1f982 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AllParser.java @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Iterator; + +import static com.yahoo.prelude.query.parser.Token.Kind.MINUS; +import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; + +/** + * Parser for queries of type all. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AllParser extends SimpleParser { + + public AllParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + int position = tokens.getPosition(); + try { + return parseItemsBody(); + } finally { + tokens.setPosition(position); + } + } + + protected Item parseItemsBody() { + // Algorithm: Collect positive, negative, and and'ed items, then combine. + AndItem and=null; + NotItem not=null; // Store negatives here as we go + Item current; + + // Find all items + do { + current=negativeItem(); + if (current!=null) { + not=addNot(current,not); + continue; + } + + current=positiveItem(); + if (current==null) + current = indexableItem(); + if (current == null) + current = compositeItem(); + + if (current!=null) + and=addAnd(current,and); + + if (current == null) + tokens.skip(); + } while (tokens.hasNext()); + + // Combine the items + Item topLevel=and; + + if (not!=null && topLevel!=null) { + not.setPositiveItem(topLevel); + topLevel=not; + } + + return simplifyUnnecessaryComposites(topLevel); + } + + // Simplify if there are unnecessary composites due to single elements + protected final Item simplifyUnnecessaryComposites(Item item) { + if (item == null) return null; + + QueryTree root = new QueryTree(item); + QueryCanonicalizer.canonicalize(root); + + return root.getRoot() instanceof NullItem ? null : root.getRoot(); + } + + protected AndItem addAnd(Item item,AndItem and) { + if (and==null) + and=new AndItem(); + and.addItem(item); + return and; + } + + protected OrItem addOr(Item item,OrItem or) { + if (or==null) + or=new OrItem(); + or.addItem(item); + return or; + } + + protected NotItem addNot(Item item,NotItem not) { + if (not==null) + not=new NotItem(); + not.addNegativeItem(item); + return not; + } + + protected Item negativeItem() { + int position = tokens.getPosition(); + Item item = null; + try { + if (!tokens.skipMultiple(MINUS)) return null; + + if (tokens.currentIsNoIgnore(SPACE)) return null; + + item = indexableItem(); + if (item == null) { + item = compositeItem(); + + if (item != null) { + if (item instanceof OrItem) { // Turn into And + AndItem and = new AndItem(); + + for (Iterator<Item> i = ((OrItem) item).getItemIterator(); i.hasNext();) { + and.addItem(i.next()); + } + item = and; + } + } + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the top level item resulting from combining the given top + * level item and the new item. This implements most of the weird transformation + * rules of the parser. + */ + protected Item combineItems(Item topLevelItem, Item item) { + if (topLevelItem == null) { + return item; + } else if (topLevelItem instanceof OrItem && item instanceof OrItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (item instanceof OrItem && topLevelItem instanceof RankItem) { + for (Iterator<Item> i = ((RankItem) topLevelItem).getItemIterator(); i.hasNext();) { + ((OrItem) item).addItem(0, i.next()); + } + return item; + } else if (item instanceof OrItem && topLevelItem instanceof PhraseItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (!(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + if (topLevelItem instanceof NotItem) { // Strange rule, but that's how it is + rank.addItem(topLevelItem); + rank.addItem(item); + } else { + rank.addItem(item); + rank.addItem(topLevelItem); + } + return rank; + } else if ((item instanceof RankItem) && (((RankItem)item).getItem(0) instanceof OrItem)) { + RankItem itemAsRank = (RankItem) item; + OrItem or = (OrItem) itemAsRank.getItem(0); + + ((RankItem) topLevelItem).addItem(0, or); + for (int i = 1; i < itemAsRank.getItemCount(); i++) { + or.addItem(0, itemAsRank.getItem(i)); + } + return topLevelItem; + } else { + ((RankItem) topLevelItem).addItem(0, item); + return topLevelItem; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java new file mode 100644 index 00000000000..3043cb27247 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java @@ -0,0 +1,266 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Parser for queries of type any. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class AnyParser extends SimpleParser { + + public AnyParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return anyItems(true); + } + + Item parseFilter(String filter, Language queryLanguage, Set<String> searchDefinitions) { + return parseFilter(filter, queryLanguage, environment.getIndexFacts().newSession(searchDefinitions, Collections.emptySet())); + } + + Item parseFilter(String filter, Language queryLanguage, IndexFacts.Session indexFacts) { + Item filterRoot; + + setState(queryLanguage, indexFacts); + tokenize(filter, null, indexFacts); + + filterRoot = anyItems(true); + + if (filterRoot == null) { + return null; + } + + markAllTermsAsFilters(filterRoot); + return filterRoot; + } + + protected Item negativeItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + tokens.skipMultiple(PLUS); + + if (!tokens.skipMultiple(MINUS)) { + return null; + } + + if (tokens.currentIsNoIgnore(SPACE)) { + return null; + } + + if (item == null) { + item = indexableItem(); + } + + if (item == null) { + item = compositeItem(); + + if (item != null) { + if (item instanceof OrItem) { // Turn into And + AndItem and = new AndItem(); + + for (Iterator<Item> i = ((OrItem) item).getItemIterator(); i.hasNext();) { + and.addItem(i.next()); + } + item = and; + } + } + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the top level item resulting from combining the given top + * level item and the new item. This implements most of the weird transformation + * rules of the parser. + */ + protected Item combineItems(Item topLevelItem, Item item) { + if (topLevelItem == null) { + return item; + } else if (topLevelItem instanceof OrItem && item instanceof OrItem) { + OrItem newTopOr = new OrItem(); + + newTopOr.addItem(topLevelItem); + newTopOr.addItem(item); + return newTopOr; + } else if (!(topLevelItem instanceof RankItem)) { + RankItem rank = new RankItem(); + + if (topLevelItem instanceof NotItem) { // Strange rule, but that's how it is + rank.addItem(topLevelItem); + rank.addItem(item); + } else { + rank.addItem(item); + rank.addItem(topLevelItem); + } + return rank; + } else if ((topLevelItem instanceof RankItem) + && (item instanceof RankItem) + && (((RankItem) item).getItem(0) instanceof OrItem)) { + RankItem itemAsRank = (RankItem) item; + OrItem or = (OrItem) itemAsRank.getItem(0); + + ((RankItem) topLevelItem).addItem(0, or); + for (int i = 1; i < itemAsRank.getItemCount(); i++) { + or.addItem(0, itemAsRank.getItem(i)); + } + return topLevelItem; + } else { + ((RankItem) topLevelItem).addItem(0, item); + return topLevelItem; + } + } + + Item applyFilter(Item root, String filter, Language queryLanguage, IndexFacts.Session indexFacts) { + setState(queryLanguage, indexFacts); + tokenize(filter, null, indexFacts); + return filterItems(root); + } + + private void markAllTermsAsFilters(Item root) { + if (root instanceof BlockItem) { + root.setFilter(true); + } + + if (root instanceof TermItem) { + root.setFilter(true); + } else { + if (root instanceof PhraseItem) { + root.setFilter(true); + } + for (Iterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + markAllTermsAsFilters(i.next()); + } + } + } + + private Item filterItems(Item root) { + while (tokens.hasNext()) { + Item item = null; + + item = positiveItem(); + root = addAndFilter(root, item); + if (item == null) { + item = negativeItem(); + root = addNotFilter(root, item); + } + if (item == null) { + item = indexableItem(); + root = addRankFilter(root, item); + } + + if (item != null) { + markAllTermsAsFilters(item); + } else { + tokens.skip(); + } + } + return root; + } + + private Item addAndFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof AndItem) { + ((AndItem) root).addItem(item); + return root; + } + + if (root instanceof RankItem) { + Item firstChild = ((RankItem) root).getItem(0); + + if (firstChild instanceof AndItem) { + ((AndItem) firstChild).addItem(item); + return root; + } else if (firstChild instanceof NotItem) { + ((NotItem) firstChild).addPositiveItem(item); + return root; + } + } + + AndItem and = new AndItem(); + + and.addItem(root); + and.addItem(item); + return and; + } + + private Item addNotFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof NotItem) { + ((NotItem) root).addNegativeItem(item); + return root; + } + + if (root instanceof RankItem) { + RankItem rootAsRank = (RankItem) root; + Item firstChild = rootAsRank.getItem(0); + + if (firstChild instanceof NotItem) { + ((NotItem) firstChild).addNegativeItem(item); + return root; + } else { + NotItem not = new NotItem(); + + not.addPositiveItem(rootAsRank.removeItem(0)); + not.addNegativeItem(item); + if (rootAsRank.getItemCount() == 0) { + return not; + } else { + rootAsRank.addItem(0, not); + return root; + } + } + } + + NotItem not = new NotItem(); + + not.addPositiveItem(root); + not.addNegativeItem(item); + return not; + } + + private Item addRankFilter(Item root, Item item) { + if (item == null) { + return root; + } + + if (root instanceof RankItem) { + ((RankItem) root).addItem(item); + return root; + } + + RankItem rank = new RankItem(); + + rank.addItem(root); + rank.addItem(item); + return rank; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java new file mode 100644 index 00000000000..a658d35e6de --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/CustomParser.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.search.query.parser.Parser; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public interface CustomParser extends Parser { + + /** + * Returns the raw result from parsing, <i>not</i> wrapped in a QueryTree + * instance. This may also be null, as opposed to using + * {@link Parser#parse(com.yahoo.search.query.parser.Parsable)}. + */ + default Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + Set<String> toSearch, IndexFacts indexFacts, String defaultIndexName) { + if (indexFacts == null) + indexFacts = new IndexFacts(); + return parse(queryToParse, filterToParse, parsingLanguage, indexFacts.newSession(toSearch, Collections.emptySet()), defaultIndexName); + } + + Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java new file mode 100644 index 00000000000..17ee905400e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/ParseException.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +/** + * Parser exceptions. JavaCC legacy, never thrown. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class ParseException extends RuntimeException { + + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java new file mode 100644 index 00000000000..ba10b7b6ee1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/PhraseParser.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.search.query.parser.ParserEnvironment; + +/** + * Parser for queries of type phrase. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class PhraseParser extends AbstractParser { + + public PhraseParser(ParserEnvironment environment) { + super(environment); + } + + protected Item parseItems() { + return forcedPhrase(); + } + + /** + * Ignores everything but words and numbers + * + * @return a phrase item if several words/numbers was found, + * a word item if only one was found + */ + private Item forcedPhrase() { + Item firstWord = null; + PhraseItem phrase = null; + + while (tokens.hasNext()) { + Token token = tokens.next(); + + if (token.kind != Token.Kind.WORD && token.kind != Token.Kind.NUMBER) { + continue; + } + // Note, this depends on segment never creating AndItems when quoted + // (the second argument) is true. + Item newWord = segment(token); + + if (firstWord == null) { // First pass + firstWord = newWord; + } else if (phrase == null) { // Second pass + phrase = new PhraseItem(); + phrase.addItem(firstWord); + phrase.addItem(newWord); + } else { // Following passes + phrase.addItem(newWord); + } + } + if (phrase != null) { + return phrase; + } else { + return firstWord; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java new file mode 100644 index 00000000000..f509825d14c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/ProgrammaticParser.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Language; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.textserialize.TextSerialize; + +import java.util.Set; + +/** + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @since 5.1.4 + */ +public final class ProgrammaticParser implements CustomParser { + + @Override + public QueryTree parse(Parsable query) { + Item root = parse(query.getQuery(), null, null, null, null, null); + if (root == null) { + root = new NullItem(); + } + return new QueryTree(root); + + } + + @Override + public Item parse(String queryToParse, String filterToParse, Language parsingLanguage, + IndexFacts.Session indexFacts, String defaultIndexName) { + if (queryToParse == null) return null; + return TextSerialize.parse(queryToParse); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java new file mode 100644 index 00000000000..6117e8e29ed --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java @@ -0,0 +1,250 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Iterator; + +import static com.yahoo.prelude.query.parser.Token.Kind.PLUS; +import static com.yahoo.prelude.query.parser.Token.Kind.SPACE; + +/** + * Base class for parsers of the "simple" query languages (query types + * ANY and ALL). + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +abstract class SimpleParser extends StructuredParser { + + protected SimpleParser(ParserEnvironment environment) { + super(environment); + } + + protected Item handleComposite(boolean topLevel) { + return anyItems(false); // Nesteds are any even if all on top level + } + + + protected abstract Item negativeItem(); + + /** + * A collection of one or more items. + * More items are collected in the default composite - or. + * If there's a explicit composite and some other terms, + * a rank terms combines them + */ + protected Item anyItems(boolean topLevel) { + int position = tokens.getPosition(); + Item item = null; + + try { + item = anyItemsBody(topLevel); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private Item anyItemsBody(boolean topLevel) { + Item topLevelItem = null; + NotItem not = null; + Item item; + + do { + item = null; + + if (item == null) { + item = positiveItem(); + if (item != null) { + if (not == null) { + not = new NotItem(); + not.addPositiveItem(item); + topLevelItem = combineItems(topLevelItem, not); + } else { + not.addPositiveItem(item); + } + } + } + + if (item == null) { + item = negativeItem(); + if (item != null) { + if (not == null && item != null) { + not = new NotItem(); + not.addNegativeItem(item); + topLevelItem = combineItems(topLevelItem, not); + } else if (item != null) { + not.addNegativeItem(item); + } + } + } + + if (item == null) { + item = compositeItem(); + if (item != null) { + if (topLevelItem == null) { + topLevelItem = item; + } else { + topLevelItem = combineItems(topLevelItem, item); + } + } + } + + if (item == null) { + item = indexableItem(); + if (item != null) { + if (topLevelItem == null) { + topLevelItem = item; + } else if (needNewTopLevel(topLevelItem, item)) { + CompositeItem newTop = new OrItem(); + + newTop.addItem(topLevelItem); + newTop.addItem(item); + topLevelItem = newTop; + } else if (topLevelItem instanceof NotItem) { + topLevelItem = combineItems(topLevelItem, item); + } else { + ((CompositeItem) topLevelItem).addItem(item); + } + } + } + + if (topLevel && item == null) { + tokens.skip(); + } + } while (tokens.hasNext() && (topLevel || item != null)); + + if (not != null && not.getItemCount() == 1) { + // Incomplete not, only positive + // => pass the positive upwards instead, drop the not + if (topLevelItem == null || topLevelItem == not) { + return not.removeItem(0); // The positive + } else if (topLevelItem instanceof RankItem) { + removeNot((RankItem) topLevelItem); + return combineItems(topLevelItem, not.getPositiveItem()); + } + } + if (not != null && not.getPositiveItem() == null) { + // Incomplete not, only negatives - + + if (topLevelItem != null && topLevelItem != not) { + // => neutral rank items becomes implicit positives + not.addPositiveItem(getItemAsPositiveItem(topLevelItem, not)); + return not; + } else { // Only negatives - ignore them + return null; + } + } + if (topLevelItem != null) { + return topLevelItem; + } else { + return not; + } + } + + + /** Says whether we need a new top level item given the new item */ + private boolean needNewTopLevel(Item topLevelItem, Item item) { + if (item == null) { + return false; + } + if (topLevelItem instanceof TermItem) { + return true; + } + if (topLevelItem instanceof PhraseItem) { + return true; + } + if (topLevelItem instanceof BlockItem) { + return true; + } + return false; + } + + + /** + * Removes and returns the first <i>not</i> found in the composite, + * or returns null if there's none + */ + private NotItem removeNot(CompositeItem composite) { + for (int i = 0; i < composite.getItemCount(); i++) { + if (composite.getItem(i) instanceof NotItem) { + return (NotItem) composite.removeItem(i); + } + } + return null; + } + + protected abstract Item combineItems(Item topLevelItem, Item item); + + protected Item positiveItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + if (!tokens.skipMultiple(PLUS)) { + return null; + } + + if (tokens.currentIsNoIgnore(SPACE)) { + return null; + } + + if (item == null) { + item = indexableItem(); + } + + if (item == null) { + item = compositeItem(); + } + if (item!=null) + item.setProtected(true); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns the content of the given item as an item to be added as a positive item. + * Used to turn a top level item into implicit positives when explicit positives + * (+ items) are not found, but negatives are. + */ + private Item getItemAsPositiveItem(Item item, NotItem not) { + if (!(item instanceof RankItem)) { + return item; + } + + RankItem rank = (RankItem) item; + + // Remove the not from the rank item, the rank should generally + // be the first, but this is not always the case + int limit = rank.getItemCount(); + int n = 0; + + while (n < limit) { + if (rank.getItem(n) == not) { + rank.removeItem(n); + break; + } + n++; + } + + if (rank.getItemCount() == 1) { + return rank.getItem(0); + } + + // Several items - or together + OrItem or = new OrItem(); + + for (Iterator<Item> i = rank.getItemIterator(); i.hasNext();) { + or.addItem(i.next()); + } + return or; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java new file mode 100644 index 00000000000..d2640e64821 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.config.subscription.ConfigSubscriber; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist.Tokens; + +import java.util.*; +import java.util.logging.Logger; + + +/** + * A <i>registry</i> which is responsible for knowing the current + * set of special tokens. The default registry returns empty token lists + * for all names. Usage of this registry is multithread safe. + * + * @author bratseth + */ +public class SpecialTokenRegistry { + + /** The log of this */ + private static Logger log = Logger.getLogger(SpecialTokens.class.getName()); + + private static final SpecialTokens nullSpecialTokens = new SpecialTokens(); + + /** + * The current authorative special token lists, indexed on name. + * These lists are unmodifiable and used directly by clients of this + */ + private Map<String,SpecialTokens> specialTokenMap = new HashMap<>(); + + private boolean frozen = false; + + /** + * Creates an empty special token registry which + * does not subscribe to any configuration + */ + public SpecialTokenRegistry() {} + + /** + * Create a special token registry which subscribes to the specialtokens + * configuration. Only used for testing. + */ + public SpecialTokenRegistry(String configId) { + try { + build(new ConfigGetter<>(SpecialtokensConfig.class).getConfig(configId)); + } catch (Exception e) { + log.config( + "No special tokens are configured (" + e.getMessage() + ")"); + } + } + + /** + * Create a special token registry from a configuration object. This is the production code path. + */ + public SpecialTokenRegistry(SpecialtokensConfig config) { + if (config != null) { + build(config); + } + freeze(); + } + + private void freeze() { + frozen = true; + } + + private void build(SpecialtokensConfig config) { + List<SpecialTokens> list = new ArrayList<>(); + for (Iterator<Tokenlist> i = config.tokenlist().iterator(); i.hasNext();) { + Tokenlist tokenList = i.next(); + SpecialTokens tokens = new SpecialTokens(tokenList.name()); + + for (Iterator<Tokens> j = tokenList.tokens().iterator(); j.hasNext();) { + Tokens token = j.next(); + tokens.addSpecialToken(token.token(), token.replace()); + } + tokens.freeze(); + list.add(tokens); + } + addSpecialTokens(list); + } + + /** + * Adds a SpecialTokens instance to the registry. That is, add the + * tokens contained for the name of the SpecialTokens instance + * given. + * + * @param specialTokens the SpecialTokens object to add + */ + public void addSpecialTokens(SpecialTokens specialTokens) { + ensureNotFrozen(); + List<SpecialTokens> list = new ArrayList<>(); + list.add(specialTokens); + addSpecialTokens(list); + + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify a frozen SpecialTokenRegistry instance."); + } + } + + private void addSpecialTokens(List<SpecialTokens> list) { + HashMap<String,SpecialTokens> tokens = new HashMap<>(specialTokenMap); + for(SpecialTokens t: list) { + tokens.put(t.getName(),t); + } + specialTokenMap = tokens; + } + + + /** + * Returns the currently authorative list of special tokens for + * a given name. + * + * @param name the name of the special tokens to return + * null, the empth string or the string "default" returns + * the default ones + * @return a read-only list of SpecialToken instances, an empty list if this name + * has no special tokens + */ + public SpecialTokens getSpecialTokens(String name) { + if (name == null || name.trim().equals("")) { + name = "default"; + } + SpecialTokens specialTokens = specialTokenMap.get(name); + + if (specialTokens == null) { + return nullSpecialTokens; + } + return specialTokens; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java new file mode 100644 index 00000000000..2db7afc36a1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.query.Substring; + +import java.util.*; +import java.util.logging.Logger; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * A list of special tokens - string that should be threated as word + * no matter what they contain. Special tokens are case insensitive. + * + * @author bratseth + */ +public class SpecialTokens { + + private static final Logger log = Logger.getLogger(SpecialTokens.class.getName()); + + private String name; + + private List<SpecialToken> specialTokens = new ArrayList<>(); + + private boolean frozen = false; + + private int currentMaximumLength = 0; + + /** Creates a null list of special tokens */ + public SpecialTokens() { + this.name = "(null)"; + } + + public SpecialTokens(String name) { + this.name = name; + } + + /** Returns the name of this special tokens list */ + public String getName() { + return name; + } + + /** + * Adds a special token to this + * + * @param token the special token string to add + * @param replace the token to replace instances of the special token with, + * or null to keep the token + */ + public void addSpecialToken(String token, String replace) { + ensureNotFrozen(); + if (!caseIndependentLength(token)) { + return; + } + // TODO are special tokens correctly unicode normalized in reagards to query parsing? + final SpecialToken specialTokenToAdd = new SpecialToken(token, replace); + currentMaximumLength = Math.max(currentMaximumLength, specialTokenToAdd.token.length()); + specialTokens.add(specialTokenToAdd); + Collections.sort(specialTokens); + } + + private boolean caseIndependentLength(String token) { + // XXX not fool proof length test, should test codepoint by codepoint for mixed case user input? not even that will necessarily be 100% robust... + String asLow = toLowerCase(token); + // TODO put along with the global toLowerCase + String asHigh = token.toUpperCase(Locale.ENGLISH); + if (asLow.length() != token.length() || asHigh.length() != token.length()) { + log.log(LogLevel.ERROR, "Special token '" + token + "' has case sensitive length. Ignoring the token." + + " Please report this message in a bug to the Vespa team."); + return false; + } else { + return true; + } + } + + /** + * Returns the special token starting at the start of the given string, or null if no + * special token starts at this string + * + * @param string the string to search for a special token at the start position + * @param substring true to allow the special token to be followed by a character which does not + * mark the end of a token + */ + public SpecialToken tokenize(String string, boolean substring) { + // XXX detonator pattern token.length may be != the length of the + // matching data in string, ref caseIndependentLength(String) + final String input = toLowerCase(string.substring(0, Math.min(string.length(), currentMaximumLength))); + for (Iterator<SpecialToken> i = specialTokens.iterator(); i.hasNext();) { + SpecialTokens.SpecialToken special = i.next(); + + if (input.startsWith(special.token())) { + if (string.length() == special.token().length() || substring || tokenEndsAt(special.token().length(), string)) + return special; + } + } + return null; + } + + private boolean tokenEndsAt(int position,String string) { + return !Character.isLetterOrDigit(string.charAt(position)); + } + + /** Returns the number of special tokens in this */ + public int size() { + return specialTokens.size(); + } + + private void ensureNotFrozen() { + if (frozen) { + throw new IllegalStateException("Tried to modify a frozen SpecialTokens instance."); + } + } + + public void freeze() { + frozen = true; + } + + /** An immutable special token */ + public final static class SpecialToken implements Comparable<SpecialToken> { + + private String token; + + private String replace; + + public SpecialToken(String token, String replace) { + this.token = toLowerCase(token); + if (replace == null || replace.trim().equals("")) { + this.replace = this.token; + } else { + this.replace = toLowerCase(replace); + } + } + + /** Returns the special token */ + public String token() { + return token; + } + + /** Returns the right replace value, never null or an empty string */ + public String replace() { + return replace; + } + + public int compareTo(SpecialToken other) { + if (this.token().length() < other.token().length()) { + return 1; + } else if (this.token().length() == other.token().length()) { + return 0; + } else { + return -1; + } + } + + public Token toToken(int start,String rawSource) { + return new Token(Token.Kind.WORD, replace(), true, new Substring(start, start + token.length(), rawSource)); // XXX: Unsafe? + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java new file mode 100644 index 00000000000..eb35655e4ca --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java @@ -0,0 +1,782 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Base class for parsers of the query languages which can be used + * for structured queries (types ANY, ALL and ADVANCED). + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +abstract class StructuredParser extends AbstractParser { + + protected StructuredParser(ParserEnvironment environment) { + super(environment); + } + + protected abstract Item handleComposite(boolean topLevel); + + protected Item compositeItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + tokens.skipMultiple(PLUS); + if (!tokens.skip(LBRACE)) { + return null; + } + + item = handleComposite(false); + + tokens.skip(RBRACE); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Sets the submodes used for url parsing. Override this to influence when such submodes are used. */ + protected void setSubmodeFromIndex(String indexName, IndexFacts.Session indexFacts) { + submodes.setFromIndex(indexName, indexFacts); + } + + protected Item indexableItem() { + int position = tokens.getPosition(); + Item item = null; + + try { + String indexName = indexPrefix(); + setSubmodeFromIndex(indexName, indexFacts); + + item = number(indexName != null); + + if (item == null) { + item = phrase(); + } + + if (item == null && indexName != null) { + if (wordsAhead()) { + item = phrase(); + } + } + + submodes.reset(); + + int weight = -1; + + if (item != null) { + weight = weightSuffix(); + } + + if (indexName != null && item != null) { + item.setIndexName(indexName); + } + + if (weight != -1 && item != null) { + item.setWeight(weight); + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + // scan forward for terms while ignoring noise + private boolean wordsAhead() { + while (tokens.hasNext()) { + if (tokens.currentIsNoIgnore(SPACE)) { + return false; + } + if (tokens.currentIsNoIgnore(NUMBER) + || tokens.currentIsNoIgnore(WORD)) { + return true; + } + tokens.skipNoIgnore(); + } + return false; + } + + // wordsAhead and nothingAhead... uhm... so similar... + private boolean nothingAhead(boolean skip) { + int position = tokens.getPosition(); + try { + boolean quoted = false; + while (tokens.hasNext()) { + if (tokens.currentIsNoIgnore(QUOTE)) { + tokens.skipMultiple(QUOTE); + quoted = !quoted; + } else { + if (!quoted && tokens.currentIsNoIgnore(SPACE)) { + return true; + } + if (tokens.currentIsNoIgnore(NUMBER) + || tokens.currentIsNoIgnore(WORD)) { + return false; + } + tokens.skipNoIgnore(); + } + } + return true; + } finally { + if (!skip) { + tokens.setPosition(position); + } + } + } + + private String indexPrefix() { + int position = tokens.getPosition(); + String item = null; + + try { + List<Token> firstWord = new ArrayList<>(); + List<Token> secondWord = new ArrayList<>(); + + tokens.skip(LSQUAREBRACKET); // For test 93 and 60 + + if (!tokens.currentIs(WORD) && !tokens.currentIs(NUMBER) + && !tokens.currentIs(UNDERSCORE)) { + return null; + } + + firstWord.add(tokens.next()); + + while (tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + firstWord.add(tokens.next()); + } + + if (tokens.currentIsNoIgnore(DOT)) { + tokens.skip(); + if (tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + secondWord.add(tokens.next()); + } else { + return null; + } + while (tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER)) { + secondWord.add(tokens.next()); + } + } + + if (!tokens.skipNoIgnore(COLON)) { + return null; + } + + if (secondWord.size() == 0) { + item = concatenate(firstWord); + } else { + item = concatenate(firstWord) + "." + concatenate(secondWord); + } + + item = indexFacts.getCanonicName(item); + + if ( ! indexFacts.isIndex(item)) { // Only if this really is an index + // Marker for the finally block + item = null; + return null; + } else { + if (nothingAhead(false)) { + // correct index syntax, correct name, but followed + // by noise. Let's skip this. + nothingAhead(true); + position = tokens.getPosition(); + item = indexPrefix(); + } + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private String concatenate(List<Token> tokens) { + StringBuilder s = new StringBuilder(); + for (Token t : tokens) { + s.append(t.toString()); + } + return s.toString(); + } + + /** Returns the specified term weight, or -1 if there is no weight suffix */ + private int weightSuffix() { + int position = tokens.getPosition(); + int item = -1; + + try { + if (!tokens.skipNoIgnore(EXCLAMATION)) { + return -1; + } + item = 150; + + if (tokens.currentIsNoIgnore(NUMBER)) { + try { + item = Integer.parseInt(tokens.next().toString()); + } catch (NumberFormatException e) { + item = -1; + } + } else { + while (tokens.currentIsNoIgnore(EXCLAMATION)) { + item += 50; + tokens.skipNoIgnore(); + } + } + return item; + + } finally { + if (item == -1) { + tokens.setPosition(position); + } + } + } + + private boolean endOfNumber() { + return tokens.currentIsNoIgnore(SPACE) + || tokens.currentIsNoIgnore(RSQUAREBRACKET) + || tokens.currentIsNoIgnore(SEMICOLON) + || tokens.currentIsNoIgnore(RBRACE) + || tokens.currentIsNoIgnore(EOF) + || tokens.currentIsNoIgnore(EXCLAMATION); + } + + private String decimalPart() { + int position = tokens.getPosition(); + boolean consumed = false; + + try { + if (!tokens.skipNoIgnore(DOT)) return ""; + if (tokens.currentIsNoIgnore(NUMBER)) { + consumed = true; + return "." + tokens.next().toString(); + } + return ""; + } finally { + if ( ! consumed) + tokens.setPosition(position); + } + } + + private IntItem number(boolean hasIndex) { + int position = tokens.getPosition(); + IntItem item = null; + + try { + if (item == null) { + item = numberRange(); + } + + tokens.skip(LSQUAREBRACKET); // For test 93 and 60 + + // TODO: Better definition of start and end of numeric items + if (item == null && hasIndex && tokens.currentIsNoIgnore(MINUS) && (tokens.currentNoIgnore(1).kind == NUMBER)) { + tokens.skipNoIgnore(); + Token t = tokens.next(); + item = new IntItem("-" + t.toString() + decimalPart(), true); + item.setOrigin(t.substring); + } else if (item == null && tokens.currentIs(NUMBER)) { + Token t = tokens.next(); + item = new IntItem(t.toString() + decimalPart(), true); + item.setOrigin(t.substring); + } + + if (item == null) { + item = numberSmaller(); + } + + if (item == null) { + item = numberGreater(); + } + if (item != null && !endOfNumber()) { + item = null; + } + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberRange() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token initial = tokens.next(); + if (initial.kind != LSQUAREBRACKET) { + return null; + } + + String rangeStart = ""; + + negative = tokens.skip(MINUS); + + if (tokens.currentIs(NUMBER)) { + rangeStart = (negative ? "-" : "") + tokens.next().toString() + decimalPart(); + } + + if (!tokens.skip(SEMICOLON)) { + return null; + } + + String rangeEnd = ""; + + negative = tokens.skip(MINUS); + + if (tokens.currentIs(NUMBER)) { + rangeEnd = (negative ? "-" : "") + tokens.next().toString() + decimalPart(); + } + + + String range = "[" + rangeStart + ";" + rangeEnd; + if (tokens.skip(SEMICOLON)) { + negative = tokens.skip(MINUS); + if (tokens.currentIs(NUMBER)) { + String rangeLimit = (negative ? "-" : "") + tokens.next().toString(); + range += ";" + rangeLimit; + } + } + tokens.skip(RSQUAREBRACKET); + + item = new IntItem(range + "]", true); + item.setOrigin(new Substring(initial.substring.start, tokens.currentNoIgnore().substring.start, + initial.getSubstring().getSuperstring())); // XXX: Unsafe end? + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberSmaller() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token initial = tokens.next(); + if (initial.kind != SMALLER) { + return null; + } + negative = tokens.skipNoIgnore(MINUS); + if (!tokens.currentIs(NUMBER)) { + return null; + } + + item = new IntItem("<" + (negative ? "-" : "") + tokens.next() + decimalPart(), true); + item.setOrigin(new Substring(initial.substring.start, tokens.currentNoIgnore().substring.start, + initial.getSubstring().getSuperstring())); // XXX: Unsafe end? + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private IntItem numberGreater() { + int position = tokens.getPosition(); + IntItem item = null; + boolean negative = false; + + try { + Token t = tokens.next(); + if (t.kind != GREATER) { + return null; + } + + negative = tokens.skipNoIgnore(MINUS); + if (!tokens.currentIs(NUMBER)) { + return null; + } + + Token number = tokens.next(); + item = new IntItem(">" + (negative ? "-" : "") + number + decimalPart(), true); + item.setOrigin(new Substring(t.substring.start, tokens.currentNoIgnore().substring.start, t.getSubstring().getSuperstring())); // XXX: Unsafe end? + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Words for phrases also permits numerals as words */ + private Item phraseWord(boolean insidePhrase) { + int position = tokens.getPosition(); + Item item = null; + + try { + if (item == null) { + item = word(); + } + + if (item == null && tokens.currentIs(NUMBER)) { + Token t = tokens.next(); + if (insidePhrase) { + item = new WordItem(t, true); + } else { + item = new IntItem(t.toString(), true); + ((TermItem) item).setOrigin(t.substring); + } + } + + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** + * Returns a WordItem if this is a non CJK query, + * a WordItem or PhraseSegmentItem if this is a CJK query, + * null if the current item is not a word + */ + private Item word() { + int position = tokens.getPosition(); + Item item = null; + + try { + if (!tokens.currentIs(WORD) + && ((!tokens.currentIs(NUMBER) && !tokens.currentIs(MINUS) + && !tokens.currentIs(UNDERSCORE)) || (!submodes.url && !submodes.site))) { + return null; + } + Token word = tokens.next(); + + if (submodes.url) { + item = new WordItem(word, true); + } else { + item = segment(word); + } + + if (submodes.url || submodes.site) { + StringBuilder buffer = null; + Token token = tokens.currentNoIgnore(); + + while (token.kind == WORD || token.kind == NUMBER || token.kind == MINUS || token.kind == UNDERSCORE) { + if (buffer == null) { + buffer = getStringContents(item); + } + buffer.append(token.toString()); + tokens.skipNoIgnore(); + token = tokens.currentNoIgnore(); + } + if (buffer != null) { + Substring termSubstring = ((BlockItem) item).getOrigin(); + Substring substring = new Substring(termSubstring.start, token.substring.start, termSubstring.getSuperstring()); // XXX: Unsafe end? + String str = buffer.toString(); + item = new WordItem(str, "", true, substring); + } + } + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + private StringBuilder getStringContents(Item item) { + if (item instanceof TermItem) { + return new StringBuilder( + ((TermItem) item).stringValue()); + } else if (item instanceof SegmentItem) { + return new StringBuilder( + ((SegmentItem) item).getRawWord()); + } else { + throw new RuntimeException("Parser bug. Unexpected item type, send stack trace in a bug ticket to the Vespa team."); + } + } + + + /** + * An phrase or word, either marked by quotes or by non-spaces between + * words or by a combination. + * + * @return a word if there's only one word, a phrase if there is + * several quoted or non-space-separated words, or null otherwise + */ + private Item phrase() { + int position = tokens.getPosition(); + Item item = null; + + try { + item = phraseBody(); + return item; + } finally { + if (item == null) { + tokens.setPosition(position); + } + } + } + + /** Returns a word, a phrase or another composite */ + private Item phraseBody() { + boolean quoted = false; + PhraseItem phrase = null; + Item firstWord = null; + boolean starAfterFirst = false; + boolean starBeforeFirst = false; + + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + boolean addStartOfHostMarker = addStartMarking(); + + braceLevelURL = 0; + + do { + starBeforeFirst = tokens.skip(STAR); + + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + + Item word = phraseWord((firstWord != null) || (phrase != null)); + + if (word == null) { + if (tokens.skipMultiple(QUOTE)) { + quoted = !quoted; + } + if (quoted && tokens.hasNext()) { + tokens.skipNoIgnore(); + continue; + } else { + break; + } + } else if (quoted && word instanceof PhraseSegmentItem) { + ((PhraseSegmentItem) word).setExplicit(true); + } + + if (phrase != null) { + phrase.addItem(word); + } else if (firstWord != null) { + phrase = new PhraseItem(); + if (quoted || submodes.site || submodes.url) { + phrase.setExplicit(true); + } + if (addStartOfHostMarker) { + phrase.addItem(MarkerWordItem.createStartOfHost()); + } + if (firstWord instanceof IntItem) { + IntItem asInt = (IntItem) firstWord; + firstWord = new WordItem(asInt.stringValue(), asInt.getIndexName(), + true, asInt.getOrigin()); + } + phrase.addItem(firstWord); + phrase.addItem(word); + } else if (word instanceof PhraseItem) { + phrase = (PhraseItem) word; + } else { + firstWord = word; + starAfterFirst = tokens.skipNoIgnore(STAR); + } + if (!quoted && tokens.currentIs(QUOTE)) { + break; + } + + boolean atWord = skipToNextPhraseWord(quoted); + + if (!atWord && tokens.skipMultipleNoIgnore(QUOTE)) { + quoted = !quoted; + } + + if (!atWord && !quoted) { + break; + } + + if (quoted && tokens.skipMultiple(QUOTE)) { + break; + } + + } while (tokens.hasNext()); + + braceLevelURL = 0; + + if (phrase != null) { + if (addEndMarking()) { + phrase.addItem(MarkerWordItem.createEndOfHost()); + } + return phrase; + } else if (firstWord != null && submodes.site) { + if (starAfterFirst && !addStartOfHostMarker) { + return firstWord; + } else { + phrase = new PhraseItem(); + if (addStartOfHostMarker) { + phrase.addItem(MarkerWordItem.createStartOfHost()); + } + if (firstWord instanceof IntItem) { + IntItem asInt = (IntItem) firstWord; + firstWord = new WordItem(asInt.stringValue(), asInt.getIndexName(), true, asInt.getOrigin()); + } + phrase.addItem(firstWord); + if (!starAfterFirst) { + phrase.addItem(MarkerWordItem.createEndOfHost()); + } + phrase.setExplicit(true); + return phrase; + } + } else { + if (firstWord != null && firstWord instanceof TermItem && (starAfterFirst || starBeforeFirst)) { + // prefix, suffix or substring + TermItem firstTerm = (TermItem) firstWord; + if (starAfterFirst) { + if (starBeforeFirst) { + return new SubstringItem(firstTerm.stringValue(), true); + } else { + return new PrefixItem(firstTerm.stringValue(), true); + } + } else { + return new SuffixItem(firstTerm.stringValue(), true); + } + } + return firstWord; + } + } + + private boolean addStartMarking() { + if (submodes.explicitAnchoring() && tokens.currentIs(HAT)) { + tokens.skip(); + return true; + } + return false; + } + + private boolean addEndMarking() { + if (submodes.explicitAnchoring() && tokens.currentIs(DOLLAR)) { + tokens.skip(); + return true; + } else if (submodes.site && tokens.currentIs(STAR)) { + tokens.skip(); + return false; + } else if (submodes.site && !tokens.currentIs(DOT)) { + return true; + } + return false; + } + + /** + * Skips one or multiple phrase separators + * + * @return true if the item we land at after skipping zero or more is + * a phrase word + */ + private boolean skipToNextPhraseWord(boolean quoted) { + boolean skipped = false; + + do { + skipped = false; + if (submodes.url) { + if (tokens.currentIsNoIgnore(RBRACE)) { + braceLevelURL--; + } + if (tokens.currentIsNoIgnore(LBRACE)) { + braceLevelURL++; + } + if (tokens.hasNext() && !tokens.currentIsNoIgnore(SPACE) + && braceLevelURL >= 0) { + tokens.skip(); + skipped = true; + } + } else if (submodes.site) { + if (tokens.hasNext() && !tokens.currentIsNoIgnore(SPACE) + && !tokens.currentIsNoIgnore(STAR) + && !tokens.currentIsNoIgnore(HAT) + && !tokens.currentIsNoIgnore(DOLLAR) + && !tokens.currentIsNoIgnore(RBRACE)) { + tokens.skip(); + skipped = true; + } + } else { + if (tokens.skipMultipleNoIgnore(DOT)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(COMMA)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(PLUS)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(MINUS)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(UNDERSCORE)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(HAT)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(DOLLAR)) { + skipped = true; + } + ; + if (tokens.skipMultipleNoIgnore(STAR)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(COLON)) { + skipped = true; + } + if (quoted) { + if (tokens.skipMultipleNoIgnore(RBRACE)) { + skipped = true; + } + if (tokens.skipMultipleNoIgnore(LBRACE)) { + skipped = true; + } + } + if (tokens.skipMultipleNoIgnore(NOISE)) { + skipped = true; + } + } + } while (skipped && !tokens.currentIsNoIgnore(WORD) + && !tokens.currentIsNoIgnore(NUMBER) && !URLModeWordChar()); + + return tokens.currentIsNoIgnore(WORD) + || tokens.currentIsNoIgnore(NUMBER) || URLModePhraseChar(); + } + + private boolean URLModeWordChar() { + if (!submodes.url) { + return false; + } + return tokens.currentIsNoIgnore(UNDERSCORE) + || tokens.currentIsNoIgnore(MINUS); + } + + private boolean URLModePhraseChar() { + if (!submodes.url) { + return false; + } + return !(tokens.currentIsNoIgnore(RBRACE) + || tokens.currentIsNoIgnore(SPACE)); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java new file mode 100644 index 00000000000..27ad26279e7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Token.java @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +import com.yahoo.prelude.query.Substring; + +/** + * A query token. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Token { + + public static enum Kind { + EOF("<EOF>"), + NUMBER("<NUMBER>"), + WORD("<WORD>"), + LETTER("<LETTER>"), + DIGIT("<DIGIT>"), + SPACE("\" \""), + NOISE("<NOISE>"), + LATINSIGN("<LATINSIGN>"), + QUOTE("\"\\\"\""), + MINUS("\"-\""), + PLUS("\"+\""), + DOT("\".\""), + COMMA("\",\""), + COLON("\":\""), + LBRACE("\"(\""), + RBRACE("\")\""), + LSQUAREBRACKET("\"[\""), + RSQUAREBRACKET("\"]\""), + SEMICOLON("\";\""), + GREATER("\">\""), + SMALLER("\"<\""), + EXCLAMATION("\"!\""), + UNDERSCORE("\"_\""), + HAT("\"^\""), + STAR("\"*\""), + DOLLAR("\"$\""), + DEFAULT(""); + + public final String image; + + private Kind(String image) { + this.image = image; + } + } + + /** The raw substring causing this token, never null */ + public final Substring substring; + + public final Token.Kind kind; + + /** Lowercase image */ + public final String image; + + /** True if this is a <i>special token</i> */ + private final boolean special; + + /** Crates a token which fails to know its origin (as a substring). Do not use, except for testing. */ + public Token(Token.Kind kind, String image) { + this(kind,image,false,null); + } + + public Token(Token.Kind kind, String image, Substring substring) { + this(kind,image,false,substring); + } + + public Token(Token.Kind kind, String image, boolean special, Substring substring) { + this.kind = kind; + this.image = image; + this.special = special; + this.substring = substring; + } + + /** Returns whether this is a <i>special token</i> */ + public boolean isSpecial() { return special; } + + public String toString() { return image; } + + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null) { + return false; + } + if (object.getClass() != this.getClass()) { + return false; + } + + Token other = (Token) object; + + if (this.kind != other.kind) { + return false; + } + if (!(this.image.equals(other.image))) { + return false; + } + + return true; + } + + /** + * Returns the substring containing the image ins original form (including casing), + * as well as all the text surrounding the token + * + * @return the image in original casing, never null + */ + public Substring getSubstring() { return substring; } + + public int hashCode() { + return image.hashCode() ^ kind.hashCode(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java new file mode 100644 index 00000000000..a1ad4983f34 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/TokenPosition.java @@ -0,0 +1,218 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + + +import java.util.List; + + +/** + * An iterator-like view of a list, but typed, random-accessible + * and with more convenience methods + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +final class TokenPosition { + + private List<Token> tokenList; + + private int position = 0; + + /** + * Creates an empty token position which must be {@link #initialize initialized} + * before use + */ + public TokenPosition() {} + + /** + * Initializes this token position. Must be done once or more before use + * + * @param tokens a list of tokens, which is not modified, and not used + * outside the calling thread + */ + public void initialize(List<Token> tokens) { + this.tokenList = tokens; + position = 0; + } + + /** + * Returns the current token without changing the position. + * Returns null (no exception) if there are no more tokens. + */ + public Token current() { + Token token = current(0); + + return token; + } + + /** + * Returns the current token without changing the position, + * and without ignoring spaces. + * Returns null (no exception) if there are no more tokens. + */ + public Token currentNoIgnore() { + return currentNoIgnore(0); + } + + /** + * Returns the token at <code>offset</code> steps from here. + * Null (no exception) if there is no token at that position + */ + public Token current(int offset) { + int i = position + offset; + + while (i < tokenList.size()) { + Token token = tokenList.get(i++); + + if (token.kind != Token.Kind.SPACE) { + return token; + } + } + return null; + } + + /** + * Returns the token at <code>offset</code> steps from here, + * without ignoring spaces. + * Null (no exception) if there is no token at that position + */ + public Token currentNoIgnore(int offset) { + if (tokenList.size() <= position + offset) { + return null; + } + return tokenList.get(position + offset); + } + + /** + * Returns whether the current token is of the given kind. + * False also if there is no token at the current position + */ + public boolean currentIs(Token.Kind kind) { + Token current = current(); + + if (current == null) { + return false; + } + return current.kind == kind; + } + + /** + * Returns whether the current token is of the given kind, + * without skipping spaces. + * False also if there is no token at the current position + */ + public boolean currentIsNoIgnore(Token.Kind kind) { + Token current = currentNoIgnore(); + + if (current == null) { + return false; + } + return current.kind == kind; + } + + /** Returns whether more tokens are available */ + public boolean hasNext() { + return tokenList.size() > (position + 1); + } + + /** + * Returns the current token and increases the position by one. + * Returns null (no exception) if there are no more tokens + */ + public Token next() { + // Go to the next-non-space. Then set token, then increase position by one + while (position < tokenList.size()) { + Token current = tokenList.get(position++); + + if (current.kind != Token.Kind.SPACE) { + return current; + } + } + return null; + } + + /** Skips past the current token */ + public void skip() { + next(); + } + + /** Skips to the next token, even if the next is a space */ + public void skipNoIgnore() { + position++; + } + + /** Sets the position */ + public void setPosition(int position) { + this.position = position; + } + + /** Returns the current position */ + public int getPosition() { + return position; + } + + /** + * Skips one or more tokens of the given kind + * + * @return true if at least one was skipped, false if there was none + */ + public boolean skipMultiple(Token.Kind kind) { + boolean skipped = false; + + while (hasNext() && current().kind == kind) { + skipped = true; + skip(); + } + return skipped; + } + + /** + * Skips one or more tokens of the given kind, without ignoring spaces + * + * @return true if at least one was skipped, false if there was none + */ + public boolean skipMultipleNoIgnore(Token.Kind kind) { + boolean skipped = false; + + while (hasNext() && currentNoIgnore().kind == kind) { + skipped = true; + skip(); + } + return skipped; + } + + /** + * Skips one or zero items of the given kind. + * + * @return true if one item was skipped, false if none was, + * or if there are no more tokens + */ + public boolean skip(Token.Kind kind) { + Token current = current(); + + if (current == null || current.kind != kind) { + return false; + } + + skip(); + return true; + } + + /** + * Skips one or zero items of the given kind, without ignoring + * spaces + * + * @return true if one item was skipped, false if none was, + * or if there are no more tokens + */ + public boolean skipNoIgnore(Token.Kind kind) { + Token current = currentNoIgnore(); + + if (current == null || current.kind != kind) { + return false; + } + + skip(); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java new file mode 100644 index 00000000000..e52a0347834 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -0,0 +1,550 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.CharacterClasses; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Substring; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import static com.yahoo.prelude.query.parser.Token.Kind.*; + +/** + * Query tokenizer. Singlethreaded. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public final class Tokenizer { + + private List<Token> tokens = new java.util.ArrayList<>(); + + private String source; + + /** Tokens which should be words, regardless of which characters they contain */ + private SpecialTokens specialTokens = null; + + /** Whether to recognize tokens also as substrings of other tokens, needed for cjk */ + private boolean substringSpecialTokens=false; + + private final CharacterClasses characterClasses; + + private int parensToEat = 0; + + private int indexLastExplicitlyChangedAt = 0; + + /** Creates a tokenizer which initializes from a given Linguistics */ + public Tokenizer(Linguistics linguistics) { + this.characterClasses=linguistics.getCharacterClasses(); + } + + /** + * Sets a list of tokens (Strings) which should be returned as WORD tokens regardless + * of their content. This list is used directly by the Tokenizer and should not be changed + * after calling this. The tokenizer will not change it. Special tokens are case + * sensitive. + */ + public void setSpecialTokens(SpecialTokens specialTokens) { + this.specialTokens = specialTokens; + } + + /** Sets whether to recognize tokens also as substrings of other tokens, needed for cjk. Default false. */ + public void setSubstringSpecialTokens(boolean substringSpecialTokens) { + this.substringSpecialTokens=substringSpecialTokens; + } + + /** + * Resets this tokenizer and create tokens from the given string, using + * "default" as the default index, and using no index information. + * + * @return a read-only list of tokens. This list can only be used by this thread + */ + public List<Token> tokenize(String string) { + return tokenize(string, new IndexFacts().newSession(Collections.emptySet(), Collections.emptySet())); + } + + /** + * Resets this tokenizer and create tokens from the given string, using + * "default" as the default index + * + * @return a read-only list of tokens. This list can only be used by this thread + */ + public List<Token> tokenize(String string, IndexFacts.Session indexFacts) { + return tokenize(string, "default", indexFacts); + } + + /** + * Resets this tokenizer and create tokens from the given string. + * + * @param string the string to tokenize + * @param defaultIndexName the name of the index to use as default + * @param indexFacts information about the indexes we will search + * @return a read-only list of tokens. This list can only be used by this thread + */ + @SuppressWarnings({"deprecation"}) + // To avoid this we need to pass an IndexFacts.session down instead - easily done but not without breaking API's + public List<Token> tokenize(String string, String defaultIndexName, IndexFacts.Session indexFacts) { + this.source = string; + tokens.clear(); + parensToEat = 0; + Index topLevelIndex = Index.nullIndex; + Index defaultIndex = indexFacts.getIndex(defaultIndexName); + if (defaultIndexName != null) { + topLevelIndex = defaultIndex; + } + Index currentIndex = topLevelIndex; + for (int i = 0; i < source.length(); i++) { + if (currentIndex.isExact()) { + i = consumeExact(i, currentIndex); // currentIndex may change after seeing a colon below + currentIndex = topLevelIndex; + } + else { + i = consumeSpecialToken(i); + } + + if (i >= source.length()) break; + + int c = source.codePointAt(i); + if (characterClasses.isLetterOrDigit(c) + || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { + i = consumeWordOrNumber(i, currentIndex); + } else if (Character.isWhitespace(c)) { + addToken(SPACE, " ", i, i + 1); + } else if (c == '"' || c == '\u201C' || c == '\u201D' + || c == '\u201E' || c == '\u201F' || c == '\u2039' + || c == '\u203A' || c == '\u00AB' || c == '\u00BB' + || c == '\u301D' || c == '\u301E' || c == '\u301F' + || c == '\uFF02') { + addToken(QUOTE, "\"", i, i + 1); + } else if (c == '-' || c == '\uFF0D') { + addToken(MINUS, "-", i, i + 1); + } else if (c == '+' || c == '\uFF0B') { + addToken(PLUS, "+", i, i + 1); + } else if (c == '.' || c == '\uFF0E') { + addToken(DOT, ".", i, i + 1); + } else if (c == ',' || c == '\uFF0C') { + addToken(COMMA, ",", i, i + 1); + } else if (c == ':' || c == '\uFF1A') { + currentIndex = determineCurrentIndex(defaultIndex, indexFacts); + addToken(COLON, ":", i, i + 1); + } else if (c == '(' || c == '\uFF08') { + addToken(LBRACE, "(", i, i + 1); + parensToEat++; + } else if (c == ')' || c == '\uFF09') { + addToken(RBRACE, ")", i, i + 1); + parensToEat--; + if (parensToEat < 0) parensToEat = 0; + } else if (c == '[' || c == '\uFF3B') { + addToken(LSQUAREBRACKET, "[", i, i + 1); + } else if (c == ']' || c == '\uFF3D') { + addToken(RSQUAREBRACKET, "]", i, i + 1); + } else if (c == ';' || c == '\uFF1B') { + addToken(SEMICOLON, ";", i, i + 1); + } else if (c == '>' || c == '\uFF1E') { + addToken(GREATER, ">", i, i + 1); + } else if (c == '<' || c == '\uFF1C') { + addToken(SMALLER, "<", i, i + 1); + } else if (c == '!' || c == '\uFF01') { + addToken(EXCLAMATION, "!", i, i + 1); + } else if (c == '_' || c == '\uFF3F') { + addToken(UNDERSCORE, "_", i, i + 1); + } else if (c == '^' || c == '\uFF3E') { + addToken(HAT, "^", i, i + 1); + } else if (c == '*' || c == '\uFF0A') { + addToken(STAR, "*", i, i + 1); + } else if (c == '$' || c == '\uFF04') { + addToken(DOLLAR, "$", i, i + 1); + } else { + addToken(NOISE, "<NOISE>", i, i + 1); + } + } + addToken(EOF, "<EOF>", source.length(), source.length()); + source = null; + return tokens; + } + + private boolean acceptApostropheAsWordCharacter(Index currentIndex) { + if (!(currentIndex.isUriIndex() || currentIndex.isHostIndex())) { + return true; + } + // this is a heuristic to check whether we probably have reached the end of an URL element + for (int i = tokens.size() - 1; i >= 0; --i) { + Token lookAt = tokens.get(i); + switch (lookAt.kind) { + case COLON: + if (i == indexLastExplicitlyChangedAt) { + return false; + } + case SPACE: + return true; + default: + // do nothing + } + } + // really not sure whether we should choose false instead, on cause of the guard at + // the start, but this seems like the conservative choice + return true; + } + + @SuppressWarnings({"deprecation"}) + private Index determineCurrentIndex(Index defaultIndex, IndexFacts.Session indexFacts) { + int backtrack = tokens.size(); + int tokencnt = 0; + for (int i = 1; i <= tokens.size(); i++) { + backtrack = tokens.size() - i; + Token lookAt = tokens.get(backtrack); + if (lookAt.kind != WORD && lookAt.kind != UNDERSCORE && lookAt.kind != NUMBER && lookAt.kind != DOT) { + // do not use this token + backtrack++; + break; + } + tokencnt++; + } + StringBuilder tmp = new StringBuilder(); + for (int i = 0; i < tokencnt; i++) { + Token useToken = tokens.get(backtrack+i); + tmp.append(useToken.image); + } + String indexName = tmp.toString(); + if (indexName.length() > 0) { + String canonicIndexName = indexFacts.getCanonicName(indexName); + Index index = indexFacts.getIndex(canonicIndexName); + if (! index.isNull()) { + indexLastExplicitlyChangedAt = tokens.size(); + return index; + } + } + return defaultIndex; + } + + private int consumeSpecialToken(int start) { + SpecialTokens.SpecialToken specialToken=getSpecialToken(start); + if (specialToken==null) return start; + tokens.add(specialToken.toToken(start,source)); + return start + specialToken.token().length(); + } + + private SpecialTokens.SpecialToken getSpecialToken(int start) { + if (specialTokens == null) { + return null; + } + return specialTokens.tokenize(source.substring(start), substringSpecialTokens); + } + + private int consumeExact(int start,Index index) { + if (index.getExactTerminator() == null) return consumeHeuristicExact(start); + return consumeToTerminator(start,index.getExactTerminator()); + } + + private boolean looksLikeExactEnd(int end) { + int parens = parensToEat; + boolean wantStar = true; + boolean wantBang = true; + boolean eatDigit = false; + + int endLimit = source.length(); + + while (end < endLimit) { + char c = source.charAt(end++); + + if (Character.isWhitespace(c)) { + // ends in whitespace + return true; + } + // handle digits (after a ! sign) + if (eatDigit && Character.isDigit(c)) { + continue; + } + eatDigit = false; + + // ! digits or any number of ! signs: + if (wantBang && c == '!') { + eatDigit = true; + while (end < endLimit) { + c = source.charAt(end); + if (c == '!') { + end++; + // more than one ! -> do not eat digits + eatDigit = false; + } else { + break; + } + } + wantBang = false; + continue; + } + + // star meaning prefix after a string: + if (wantStar && (c == '*' || c == '\uFF0A')) { + wantStar = false; + continue; + } + + // parens ending a group: + if (parens > 0 && c == ')') { + parens--; + continue; + } + + // something else + return false; + } + // end of field + return true; + } + + private int consumeHeuristicExact(int start) { + int curPos = -1; + int actualStart = -1; + int starPos = -1; + int endLimit = source.length(); + + boolean suffStar = false; + boolean isQuoted = false; + boolean seenSome = false; + + boolean wantStartQuote = true; + boolean wantEndQuote = false; + boolean wantStartStar = true; + + // ignore whitespace at start until we something else + boolean ignWS = true; + + for (curPos = start; curPos < endLimit; curPos++) { + char c = source.charAt(curPos); + + if (Character.isWhitespace(c)) { + if (ignWS) continue; + // ends exact token unless quoted + if (!wantEndQuote) break; + } + ignWS = false; + + if (c == '"') { + if (wantStartQuote) { + // starts actual token + wantStartQuote = false; + wantEndQuote = true; + actualStart = curPos+1; + } else if (wantEndQuote && looksLikeExactEnd(curPos+1)) { + // System.err.println("seen quoted token from "+actualStart+" to "+curPos); + seenSome = true; + wantEndQuote = false; + isQuoted = true; + // ends token + break; + } + // else: part of exact token + continue; + } + // no processing of non-quotes inside quotes + if (wantEndQuote) continue; + + if (c == '*' || c == '\uFF0A') { + if (wantStartStar) { + suffStar = true; + wantStartStar = false; + starPos = curPos; + continue; + } + } + + if (c == '!' || c == '*' || c == '\uFF0A') { + // ends token if non-empty + if (seenSome && looksLikeExactEnd(curPos)) break; + } + + if (c == ')' && seenSome && looksLikeExactEnd(curPos)) { + break; + } + if (!seenSome) { + // everything else: something that starts the actual token + actualStart = curPos; + seenSome = true; + wantStartQuote = false; + wantStartStar = false; + } + } + + int end = curPos; + + // handle some ill-formed inputs: + + if (wantEndQuote) { + // missing end quote: reprocess without quote handling + isQuoted = false; + actualStart = -1; + starPos = -1; + suffStar = false; + seenSome = false; + wantStartStar = true; + + // ignore whitespace at start until we something else + ignWS = true; + + for (curPos = start; curPos < endLimit; curPos++) { + char c = source.charAt(curPos); + + if (Character.isWhitespace(c)) { + if (ignWS) continue; + // ends exact token + break; + } + ignWS = false; + + if (c == '*' || c == '\uFF0A') { + if (wantStartStar) { + suffStar = true; + wantStartStar = false; + starPos = curPos; + continue; + } + } + + if (c == '!' || c == '*' || c == '\uFF0A') { + // ends token if non-empty + if (seenSome) break; + } + + if (c == ')' && seenSome && parensToEat > 0) { + break; + } + if (!seenSome) { + // everything else: something that starts the actual token + actualStart = curPos; + seenSome = true; + wantStartStar = false; + } + } + end = curPos; + } + + if (! seenSome) { + // no token content: may need to include stars or whitespace + if (suffStar) { + // use the star as token: + suffStar = false; + actualStart = starPos; + } else { + // just include all we have (possibly whitespace or an empty string): + actualStart = start; + } + } + + if (suffStar) { + addToken(STAR, "*", starPos, starPos + 1); + } + tokens.add(new Token(WORD, source.substring(actualStart, end), true, new Substring(actualStart, end, source))); // XXX: Unsafe? + + // skip terminating quote + if (isQuoted) { + end++; + } + return end; + } + + private int consumeToTerminator(int start,String terminator) { + int end = start; + while (end < source.length()) { + if (terminatorStartsAt(end,terminator)) + break; + end++; + } + tokens.add(new Token(WORD, source.substring(start, end), true, new Substring(start, end, source))); // XXX: Unsafe start? + if (end>=source.length()) + return end; + else + return end+terminator.length(); // Don't create a token for the terminator + } + + private boolean terminatorStartsAt(int start,String terminator) { + int terminatorPosition=0; + while ((terminatorPosition+start)<source.length()) { + if (source.charAt(start+terminatorPosition)!=terminator.charAt(terminatorPosition)) + return false; + terminatorPosition++; + if (terminatorPosition >= terminator.length()) + return true; // Reached end of terminator + } + return false; // Reached end of source before reaching end of terminator + } + + /** Consumes a word or number <i>and/or possibly</i> a special token starting within this word or number */ + private int consumeWordOrNumber(int start, Index currentIndex) { + int tokenEnd = start; + SpecialTokens.SpecialToken substringSpecialToken = null; + boolean digitsOnly = true; + // int underscores = 0; + // boolean underscoresOnly = true; + boolean quotesOnly = true; + + while (tokenEnd < source.length()) { + if (substringSpecialTokens) { + substringSpecialToken=getSpecialToken(tokenEnd); + if (substringSpecialToken!=null) break; + } + + int c = source.codePointAt(tokenEnd); + + if (characterClasses.isLetter(c)) { + digitsOnly = false; + // if (c != '_') { + // if (underscores > 3) { + // break; + // } else { + // underscores = 0; + // } + // underscoresOnly = false; + // } else { + // underscores += 1; + // } + quotesOnly = false; + } else if (characterClasses.isLatinDigit(c)) { + // Yes, do nothing as long as the underscore logic + // is deactivated. + // underscoresOnly = false; + quotesOnly = false; + } else if (c == '\'') { + if (!acceptApostropheAsWordCharacter(currentIndex)) { + break; + } + // Otherwise consume apostrophes... + digitsOnly = false; + } else { + break; + } + tokenEnd += Character.charCount(c); + } + // if (underscores > 3 && !underscoresOnly) { + // tokenEnd -= underscores; + // } + if (tokenEnd>start) { + // if (underscoresOnly) { + // addToken(NOISE, source.substring(start, tokenEnd), start, tokenEnd); + // } else + if (quotesOnly) { + addToken(NOISE, source.substring(start, tokenEnd), start, tokenEnd); + } else { + addToken(digitsOnly ? NUMBER : WORD, source.substring(start, tokenEnd), start, tokenEnd); + } + } + + if (substringSpecialToken==null) + return --tokenEnd; + // TODO: test the logic around tokenEnd with friends + addToken(substringSpecialToken.toToken(tokenEnd,source)); + return --tokenEnd+substringSpecialToken.token().length(); + } + + private void addToken(Token.Kind kind, String word, int start, int end) { + addToken(new Token(kind, word, false, new Substring(start, end, source))); // XXX: Unsafe? + } + + private void addToken(Token token) { + tokens.add(token); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java new file mode 100644 index 00000000000..6c48e980aff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/UnicodePropertyDump.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import java.io.PrintStream; + + +/** + * Dump properties of unicode characters in a format compatible + * with fastlib/text/unicode_propertydump + * + * <p>Arguments:</p> + * + * <ol> + * <li>start-char-number</li> + * <li>end-char-number</li> + * <li>debug true|false</li> + * </ol> + * + * @author <a href="mailto:vlarsen@yahoo-inc.com">Vidar Larsen</a> + */ +class UnicodePropertyDump { + public static void main(String[] arg) { + int start = 0; + int end = 0xffff; + boolean debug = false; + + if (arg.length > 0) { + start = new Integer(arg[0]).intValue(); + } + if (arg.length > 1) { + end = new Integer(arg[1]).intValue(); + } + if (arg.length > 2) { + debug = new Boolean(arg[2]).booleanValue(); + } + dumpProperties(start, end, debug, System.out); + } + + static void dumpProperties(int start, int end, boolean debug, PrintStream out) { + for (int i = start; i < end; i++) { + // printf("%08x ", i); + String charcode = Integer.toHexString(i); + + while (charcode.length() < 8) { + charcode = "0" + charcode; + } + out.print(charcode + " "); + + /* + * compute property bitmap fastlib-style + * bit 0 = white space + * bit 1 = word char + * bit 2 = ideographic + * bit 3 = decimal digit + * bit 4 = ignorable control + * + * White_Space = 0x01 + * Alphabetic = 0x02 + * Diacritic = 0x02 + * Extender = 0x02 + * Custom_word_char = 0x02 + * Ideographic = 0x04 + * Nd = 0x0A (both digit and alphabetic) + * Default_Ignorable_Code_Point = 0x10 + * Custom_Non_Word_Char = ~0x02 + * + * Uses both PropList, DerivedCoreProperties, CustomProperties + * and UnicodeData + */ + int map = 0; + char the_char = (char) i; + int char_type = Character.getType(the_char); + + if (Character.isWhitespace(the_char)) { + map |= 0x01; + } + + if (Character.isLetter(the_char)) { + map |= 0x02; + } + + if (Character.getType(the_char) == Character.OTHER_LETTER) { + map |= 0x04; + } + + if (Character.isDigit(the_char)) { + map |= 0x0A; + } + + if ((char_type == Character.CONTROL || char_type == Character.FORMAT + || char_type == Character.SURROGATE + || char_type == Character.UNASSIGNED) + && !Character.isWhitespace(the_char) + ) { + map |= 0x10; + } + + // printf("%04x\n", map); + String mapcode = Integer.toHexString(map); + + while (mapcode.length() < 4) { + mapcode = "0" + mapcode; + } + out.print(mapcode); + if (debug) { + out.print(" " + char_type); + } + out.println(); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java new file mode 100644 index 00000000000..519f07b0aa3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/WebParser.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.parser; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.query.parser.ParserEnvironment; + +import java.util.Set; + +/** + * Parser for web search queries. Language: + * + * <ul> + * <li>+item: always include this item as-is when searching (term becomes <i>protected</i>) + * <li>-item: Exclude item when searching (terms becomes <i>protected</i>) + * <li>a OR b (capital or): Or search for a or b + * <li>"a b": Phrase search for a followed by b + * </ul> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class WebParser extends AllParser { + + public WebParser(ParserEnvironment environment) { + super(environment); + } + + protected @Override Item parseItemsBody() { + // Algorithm: Collect positive, negative, and'ed and or'ed elements, then combine. + AndItem and=null; + OrItem or=null; + NotItem not=null; // Store negatives here as we go + Item current; + + // Find all items + do { + current=negativeItem(); + if (current!=null) { + not=addNot(current,not); + continue; + } + + current=positiveItem(); + if (current==null) + current = indexableItem(); + + if (current!=null) { + if (and!=null && (current instanceof WordItem) && "OR".equals(((WordItem)current).getRawWord())) { + if (or==null) + or=addOr(and,or); + and=new AndItem(); + or.addItem(and); + } + else { + and=addAnd(current,and); + } + } + + if (current == null) // Change + tokens.skip(); + } while (tokens.hasNext()); + + // Combine the items + Item topLevel=and; + + if (or!=null) + topLevel=or; + + if (not!=null && topLevel!=null) { + not.setPositiveItem(topLevel); + topLevel=not; + } + + return simplifyUnnecessaryComposites(topLevel); + } + + protected void setSubmodeFromIndex(String indexName, Set<String> searchDefinitions) { + // No submodes in this language + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java new file mode 100644 index 00000000000..e75e7aad9dc --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.query.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java new file mode 100644 index 00000000000..dc49979bec7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/Discloser.java @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.textualrepresentation; + +import com.yahoo.prelude.query.Item; + +/** + * Allows an item to disclose its properties and children/value. + * + * @author tonytv + */ +public interface Discloser { + void addProperty(String key, Object value); + + //A given item should either call setValue or addChild, not both. + void setValue(Object value); + void addChild(Item item); +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java new file mode 100644 index 00000000000..418636d39db --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/textualrepresentation/TextualQueryRepresentation.java @@ -0,0 +1,210 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query.textualrepresentation; + +import com.yahoo.prelude.query.Item; + +import java.lang.reflect.Array; +import java.util.*; +import java.util.regex.Pattern; + +/** + * Creates a detailed, QED inspired representation of a query tree. + * + * @author tonytv + */ +public class TextualQueryRepresentation { + private Map<Item, Integer> itemReferences = new IdentityHashMap<>(); + private int nextItemReference = 0; + + final private ItemDiscloser rootDiscloser; + + /** Creates the textual representation for a single Item. */ + private class ItemDiscloser implements Discloser { + private final Item item; + + final Map<String, Object> properties = new TreeMap<>(); + final String name; + + Object value; + final List<ItemDiscloser> children = new ArrayList<>(); + + ItemDiscloser(Item item) { + this.item = item; + name = item.getName(); + } + + public void addProperty(String key, Object value) { + assert(key.indexOf(' ') == -1); + properties.put(key, value); + + if (value instanceof Item) + setItemReference((Item)value); + } + + public void setValue(Object value) { + assert(children.isEmpty()); + this.value = value; + } + + public void addChild(Item child) { + assert(value == null); + children.add(expose(child)); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(name); + + if (!properties.isEmpty() || itemReferences.get(item) != null) { + builder.append('['); + addPropertiesString(builder); + builder.append(']'); + } + + if (value != null || !children.isEmpty()) { + builder.append("{\n"); + addBody(builder); + builder.append("}\n"); + } + return builder.toString(); + } + + private void addBody(StringBuilder builder) { + if (value != null) { + addIndented(builder, valueString(value)); + } else { + for (ItemDiscloser child : children) { + addIndented(builder, child.toString()); + } + } + } + + //for each line: add "<indentation><line><newline>" + private void addIndented(StringBuilder builder, String toAdd) { + String indent = " "; + for (String line : toAdd.split(Pattern.quote("\n"))) + builder.append(indent).append(line).append('\n'); + } + + private void addPropertiesString(StringBuilder s) { + boolean firstTime = true; + + Integer itemReference = itemReferences.get(item); + if (itemReference != null) { + addPropertyString(s, "%id", itemReference); + firstTime = false; + } + + for (Map.Entry<String,Object> entry : properties.entrySet()) { + if (!firstTime) { + s.append(' '); + } + addPropertyString(s, entry.getKey(), entry.getValue()); + firstTime = false; + } + } + + private void addPropertyString(StringBuilder s, String key, Object value) { + s.append(key).append('=').append(valueString(value)); + } + + private void setItemReference(Item item) { + if (itemReferences.get(item) == null) + itemReferences.put(item, nextItemReference++); + } + + } + + + @SuppressWarnings("rawtypes") + private String valueString(Object value) { + if (value == null) + return null; + else if (value instanceof String) + return '"' + quote((String)value) + '"'; + else if (value instanceof Number || value instanceof Boolean || value instanceof Enum) + return value.toString(); + else if (value instanceof Item) + return itemReference((Item)value); + else if (value.getClass().isArray()) + return listString(arrayToList(value).iterator()); + else if ( value instanceof List ) + return listString(((List)value).iterator()); + else if ( value instanceof Set ) + return listString( ((Set)value).iterator()); + else if ( value instanceof Map ) + return mapString((Map)value); + else + return '"' + quote(value.toString()) + '"'; + } + + //handles both primitive and object arrays. + @SuppressWarnings({ "rawtypes", "unchecked" }) + private List arrayToList(Object array) { + int length = Array.getLength(array); + List list = new ArrayList(); + for (int i = 0; i<length; ++i) + list.add(Array.get(array, i)); + return list; + } + + private String mapString(Map<?, ?> map) { + StringBuilder result = new StringBuilder(); + final String mapBegin = "map("; + result.append(mapBegin); + + boolean firstTime = true; + for (Map.Entry<?,?> entry: map.entrySet()) { + if (!firstTime) + result.append(' '); + firstTime = false; + + result.append(valueString(entry.getKey())).append("=>").append(valueString(entry.getValue())); + } + + result.append(')'); + return result.toString(); + } + + private String listString(Iterator<?> iterator) { + StringBuilder result = new StringBuilder(); + result.append('('); + + boolean firstTime = true; + while (iterator.hasNext()) { + if (!firstTime) + result.append(' '); + firstTime = false; + + result.append(valueString(iterator.next())); + } + + result.append(')'); + return result.toString(); + } + + private String itemReference(Item item) { + Integer reference = itemReferences.get(item); + return reference != null ? reference.toString() : "Unknown item: '" + System.identityHashCode(item) + "'"; + } + + private static String quote(String s) { + return s.replaceAll("\"", "\\\\\"" ); + } + + private ItemDiscloser expose(Item item) { + ItemDiscloser itemDiscloser = new ItemDiscloser(item); + item.disclose(itemDiscloser); + return itemDiscloser; + } + + public TextualQueryRepresentation(Item root) { + rootDiscloser = expose(root); + } + + @Override + public String toString() { + return rootDiscloser.toString(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java new file mode 100644 index 00000000000..009c11ab1fd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CJKSearcher.java @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +import java.util.Iterator; +import java.util.ListIterator; + +import com.yahoo.language.Language; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.AndSegmentItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.PhraseSegmentItem; +import com.yahoo.prelude.query.SegmentItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +/** + * Search to do necessary transforms if the query is in segmented in + * a "CJK language". + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After(PhaseNames.UNBLENDED_RESULT) +@Before(STEMMING) +@Provides(CJKSearcher.TERM_ORDER_RELAXATION) +public class CJKSearcher extends Searcher { + public static final String TERM_ORDER_RELAXATION = "TermOrderRelaxation"; + + @Override + public Result search(Query query, Execution execution) { + Language l = query.getModel().getParsingLanguage(); + if (!l.isCjk()) return execution.search(query); + + QueryTree tree = query.getModel().getQueryTree(); + tree.setRoot(transform(tree.getRoot())); + query.trace("Rewriting for CJK behavior for implicit phrases", true, 2); + return execution.search(query); + } + + private Item transform(Item root) { + if (root instanceof PhraseItem) { + PhraseItem asPhrase = (PhraseItem) root; + if (asPhrase.isExplicit() || hasOverlappingTokens(asPhrase)) return root; + + AndItem replacement = new AndItem(); + for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + Item item = i.next(); + if (item instanceof WordItem) replacement.addItem(item); + else if (item instanceof PhraseSegmentItem) { + replacement.addItem(new AndSegmentItem((PhraseSegmentItem) item)); + } + else replacement.addItem(item); // should never run, but hey... just convert and hope it's OK :) + } + return replacement; + } else if (root instanceof PhraseSegmentItem) { + PhraseSegmentItem asSegment = (PhraseSegmentItem) root; + if (asSegment.isExplicit() || hasOverlappingTokens(asSegment)) return root; + else return new AndSegmentItem(asSegment); + } else if (root instanceof SegmentItem) { + return root; // avoid descending into AndSegmentItems and similar + } else if (root instanceof CompositeItem) { + for (ListIterator<Item> i = ((CompositeItem) root).getItemIterator(); i.hasNext();) { + Item item = i.next(); + Item transformedItem = transform(item); + if (item != transformedItem) { + i.set(transformedItem); + } + } + return root; + } + return root; + } + + + private boolean hasOverlappingTokens(PhraseItem phrase) { + boolean has = false; + for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext(); ) { + Item segment = i.next(); + if (segment instanceof PhraseSegmentItem) has = hasOverlappingTokens((PhraseSegmentItem) segment); + if (has) return true; + } + return has; + } + + /* + * We have overlapping tokens (see + * com.yahoo.prelude.querytransform.test.CJKSearcherTestCase + * .testCjkQueryWithOverlappingTokens and ParseTestCase for an explanation) + * if the sum of length of tokens is greater than the lenght of the original + * word + */ + private boolean hasOverlappingTokens(PhraseSegmentItem segments) { + int segmentsLength=0; + for (Iterator<Item> i = segments.getItemIterator(); i.hasNext(); ) { + WordItem segment = (WordItem) i.next(); + segmentsLength += segment.getWord().length(); + } + return segmentsLength > segments.getRawWord().length(); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java new file mode 100644 index 00000000000..0bddaf5ff51 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/CollapsePhraseSearcher.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import java.util.ListIterator; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.searchchain.Execution; + +/** + * Make single item phrases in query into single word items. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class CollapsePhraseSearcher extends Searcher { + public Result search(Query query, Execution execution) { + QueryTree tree = query.getModel().getQueryTree(); + Item root = tree.getRoot(); + if (root != null) { + Item newRoot = root.clone(); + newRoot = simplifyPhrases(newRoot); + // Sets new root instead of transforming the query tree + // to make code nicer if the root is a single term phrase + if (!root.equals(newRoot)) { + tree.setRoot(newRoot); + query.trace("Collapsing single term phrases to single terms", + true, 2); + } + } + return execution.search(query); + } + + + private Item simplifyPhrases(Item root) { + if (root == null) { + return root; + } + else if (root instanceof PhraseItem) { + return collapsePhrase((PhraseItem)root); + } + else if (root instanceof CompositeItem) { + CompositeItem composite = (CompositeItem)root; + ListIterator<Item> i = composite.getItemIterator(); + while (i.hasNext()) { + Item original = i.next(); + Item transformed = simplifyPhrases(original); + if (original != transformed) + i.set(transformed); + } + return root; + } + else { + return root; + } + } + private Item collapsePhrase(PhraseItem root) { + if (root.getItemCount() == 1) + return root.getItem(0); + else + return root; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java new file mode 100644 index 00000000000..e56303e60f8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java @@ -0,0 +1,358 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import static com.yahoo.prelude.querytransform.PhrasingSearcher.PHRASE_REPLACEMENT; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.Index.Attribute; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +/** + * Searcher to rewrite queries to achieve mixed recall between indices and + * memory attributes. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After({PhaseNames.RAW_QUERY, PHRASE_REPLACEMENT}) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(IndexCombinatorSearcher.MIXED_RECALL_REWRITE) +// TODO: This is not necessary on Vespa 6, we should probably remove it from the default chain but keep it +// around until Vespa 6 to avoid breaking those who refer to it. +public class IndexCombinatorSearcher extends Searcher { + public static final String MIXED_RECALL_REWRITE = "MixedRecallRewrite"; + + private static class ArrayComparator implements Comparator<Attribute[]> { + /** + * Note, this ignores if there is a difference in whether to + * attributes have tokenized content. (If this is the case, + * we are having worse problems anyway.) + */ + public int compare(Attribute[] o1, Attribute[] o2 ) { + if (o1.length < o2.length) { + return -1; + } else if (o1.length > o2.length) { + return 1; + } + int limit = o1.length; + for (int i = 0; i < limit; ++i) { + int r = o1[i].name.compareTo(o2[i].name); + if (r != 0) { + return r; + } + } + return 0; + } + } + + private final ArrayComparator comparator = new ArrayComparator(); + + private enum RewriteStrategies { + NONE, CHEAP_AND, EXPENSIVE_AND, FLAT + } + + @Override + public com.yahoo.search.Result search(Query query, Execution execution) { + Item root = query.getModel().getQueryTree().getRoot(); + IndexFacts.Session session = execution.context().getIndexFacts().newSession(query); + String oldQuery = (query.getTraceLevel() >= 2) ? root.toString() : ""; + + if (root instanceof BlockItem || root instanceof PhraseItem) { + root = convertSinglePhraseOrBlock(root, session); + } else if (root instanceof CompositeItem) { + root = rewrite((CompositeItem) root, session); + } + query.getModel().getQueryTree().setRoot(root); + + if ((query.getTraceLevel() >= 2) && !(oldQuery.equals(root.toString()))) { + query.trace("Rewriting for mixed recall between indices and attributes", true, 2); + } + return execution.search(query); + } + + private RewriteStrategies chooseRewriteStrategy(CompositeItem c, IndexFacts.Session session) { + if (c instanceof OrItem) { + return RewriteStrategies.FLAT; + } else if (!(c instanceof AndItem)) { + return RewriteStrategies.NONE; + } + Map<Attribute[], Integer> m = new TreeMap<>(comparator); + for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { + Item j = i.next(); + if (j instanceof BlockItem || j instanceof PhraseItem) { + Attribute[] attributes= getIndices((HasIndexItem) j, session); + if (attributes == null) { + continue; + } + Integer count = m.get(attributes); + if (count == null) { + count = 1; + } else { + count = count.intValue() + 1; + } + m.put(attributes, count); + } + } + + if (m.size() == 0) { + return RewriteStrategies.NONE; + } + + int singles = 0; + int pairs = 0; + int higher = 0; + // count the number of sets being associated with 1, 2 or more terms + for (Integer i : m.values()) { + switch (i.intValue()) { + case 1: + ++singles; + break; + case 2: + pairs += 2; + break; + default: + ++higher; + break; + } + } + if (higher == 0 && pairs + singles <= 2) { + return RewriteStrategies.EXPENSIVE_AND; + } else { + return RewriteStrategies.CHEAP_AND; + } + } + + private CompositeItem rewriteNot(NotItem not, IndexFacts.Session session) { + Item positive = not.getItem(0); + if (positive instanceof BlockItem || positive instanceof PhraseItem) { + positive = convertSinglePhraseOrBlock(positive, session); + not.setItem(0, positive); + } else if (positive instanceof CompositeItem) { + CompositeItem c = (CompositeItem) positive; + positive = rewrite(c, session); + not.setItem(0, positive); + } + + int length = not.getItemCount(); + // no need for keeping proximity in the negative branches, so we + // convert them one by one, _and_ always uses cheap transform + for (int i = 1; i < length; ++i) { + Item exclusion = not.getItem(i); + if (exclusion instanceof BlockItem || exclusion instanceof PhraseItem) { + exclusion = convertSinglePhraseOrBlock(exclusion, session); + not.setItem(i, exclusion); + } else if (exclusion instanceof CompositeItem) { + CompositeItem c = (CompositeItem) exclusion; + switch (chooseRewriteStrategy(c, session)) { + case NONE: + c = traverse(c, session); + break; + case CHEAP_AND: + case EXPENSIVE_AND: + c = cheapTransform(c, session); + break; + default: + c = flatTransform(c, session); + break; + } + not.setItem(i, c); + } + } + return not; + } + + private Item rewrite(CompositeItem c, IndexFacts.Session session) { + if (c instanceof NotItem) { + c = rewriteNot((NotItem) c, session); + return c; + } else if (c instanceof CompositeItem) { + switch (chooseRewriteStrategy(c, session)) { + case NONE: + c = traverse(c, session); + break; + case CHEAP_AND: + c = cheapTransform(c, session); + break; + case EXPENSIVE_AND: + c = expensiveTransform((AndItem) c, session); + break; + case FLAT: + c = flatTransform(c, session); + default: + break; + } + } + return c; + } + + private CompositeItem traverse(CompositeItem c, IndexFacts.Session session) { + int length = c.getItemCount(); + for (int i = 0; i < length; ++i) { + Item word = c.getItem(i); + if (word instanceof CompositeItem && !(word instanceof PhraseItem) + && !(word instanceof BlockItem)) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + return c; + } + + private CompositeItem expensiveTransform(AndItem c, IndexFacts.Session session) { + int[] indices = new int[2]; + int items = 0; + int length = c.getItemCount(); + Attribute[][] names = new Attribute[2][]; + CompositeItem result = null; + for (int i = 0; i < length; ++i) { + Item word = c.getItem(i); + if (word instanceof BlockItem || word instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) word, session); + if (attributes == null) { + continue; + } + // this throwing an out of bounds if more than two candidates is intentional + names[items] = attributes; + indices[items++] = i; + } else if (word instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + switch (items) { + case 1: + result = linearAnd(c, names[0], indices[0]); + break; + case 2: + result = quadraticAnd(c, names[0], names[1], indices[0], indices[1]); + break; + default: + // should never happen + getLogger().log( + LogLevel.WARNING, + "Unexpected number of items for mixed recall, got " + items + + ", expected 1 or 2."); + break; + } + return result; + } + + private Attribute[] getIndices(HasIndexItem block, IndexFacts.Session session) { + return session.getIndex(block.getIndexName()).getMatchGroup(); + } + + private OrItem linearAnd(AndItem c, Attribute[] names, int brancherIndex) { + OrItem or = new OrItem(); + for (int i = 0; i < names.length; ++i) { + AndItem duck = (AndItem) c.clone(); + Item b = retarget(duck.getItem(brancherIndex), names[i]); + duck.setItem(brancherIndex, b); + or.addItem(duck); + } + return or; + } + + private OrItem quadraticAnd(AndItem c, Attribute[] firstNames, Attribute[] secondNames, int firstBrancher, int secondBrancher) { + OrItem or = new OrItem(); + for (int i = 0; i < firstNames.length; ++i) { + for (int j = 0; j < secondNames.length; ++j) { + AndItem duck = (AndItem) c.clone(); + Item b = retarget(duck.getItem(firstBrancher), firstNames[i]); + duck.setItem(firstBrancher, b); + b = retarget(duck.getItem(secondBrancher), secondNames[j]); + duck.setItem(secondBrancher, b); + or.addItem(duck); + } + } + return or; + } + + private CompositeItem flatTransform(CompositeItem c, IndexFacts.Session session) { + int maxIndex = c.getItemCount() - 1; + for (int i = maxIndex; i >= 0; --i) { + Item word = c.getItem(i); + if (word instanceof BlockItem || word instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) word, session); + if (attributes == null) { + continue; + } + c.removeItem(i); + for (Attribute name : attributes) { + Item term = word.clone(); + Item forNewIndex = retarget(term, name); + c.addItem(forNewIndex); + } + } else if (word instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) word, session)); + } + } + return c; + } + + private CompositeItem cheapTransform(CompositeItem c, IndexFacts.Session session) { + if (c instanceof OrItem) { + return flatTransform(c, session); + } + int length = c.getItemCount(); + for (int i = 0; i < length; ++i) { + Item j = c.getItem(i); + if (j instanceof BlockItem || j instanceof PhraseItem) { + Attribute[] attributes = getIndices((HasIndexItem) j, session); + if (attributes == null) { + continue; + } + CompositeItem or = searchAllForItem(j, attributes); + c.setItem(i, or); + } else if (j instanceof CompositeItem) { + c.setItem(i, rewrite((CompositeItem) j, session)); + } + } + return c; + } + + private OrItem searchAllForItem(Item word, Attribute[] attributes) { + OrItem or = new OrItem(); + for (Attribute name : attributes) { + Item term = word.clone(); + term = retarget(term, name); + or.addItem(term); + } + return or; + } + + private Item retarget(Item word, Attribute newIndex) { + if (word instanceof PhraseItem && !newIndex.isTokenizedContent()) { + PhraseItem asPhrase = (PhraseItem) word; + WordItem newWord = new WordItem(asPhrase.getIndexedString(), newIndex.name, false); + return newWord; + } else if (word instanceof IndexedItem) { + word.setIndexName(newIndex.name); + } else if (word instanceof CompositeItem) { + CompositeItem asComposite = (CompositeItem) word; + for (Iterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { + Item segment = i.next(); + segment.setIndexName(newIndex.name); + } + } + return word; + } + + private Item convertSinglePhraseOrBlock(Item item, IndexFacts.Session session) { + Item newItem; + Attribute[] attributes = getIndices((HasIndexItem) item, session); + if (attributes == null) { + return item; + } + newItem = searchAllForItem(item, attributes); + return newItem; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java new file mode 100644 index 00000000000..152a7565cb9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/LiteralBoostSearcher.java @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * Adds rank terms to boost hits matching exact literals fields using info + * from indexing commands. + * + * @author bratseth + */ +@Before(STEMMING) +@After(PhaseNames.UNBLENDED_RESULT) +public class LiteralBoostSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + addRankTerms(query, execution.context().getIndexFacts().newSession(query)); + return execution.search(query); + } + + private void addRankTerms(Query query, IndexFacts.Session indexFacts) { + RankItem newRankTerms = new RankItem(); + addLiterals(newRankTerms, query.getModel().getQueryTree().getRoot(), indexFacts); + if (newRankTerms.getItemCount() > 0) + addTopLevelRankTerms(newRankTerms, query); + + if (query.getTraceLevel() >= 2 && newRankTerms.getItemCount() > 0) + query.trace("Added rank terms for possible literal field matches.", true, 2); + } + + /** + * Adds a RankItem at the root of a query, but only if there is + * at least one rank term in the specified RankItem. + * If the root is already a RankItem, just append the new rank terms. + * + * @param rankTerms the new rank item to add. + * @param query the query to add to + */ + private void addTopLevelRankTerms(RankItem rankTerms, Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + if (root instanceof RankItem) { + for (Iterator<Item> i = rankTerms.getItemIterator(); i.hasNext(); ) { + ((RankItem)root).addItem(i.next()); + } + } + else { + rankTerms.addItem(0, root); + query.getModel().getQueryTree().setRoot(rankTerms); + + } + } + + private void addLiterals(RankItem rankTerms, Item item, IndexFacts.Session indexFacts) { + if (item == null) return; + + if (item instanceof NotItem) { + addLiterals(rankTerms, ((NotItem) item).getPositiveItem(), indexFacts); + } + else if (item instanceof CompositeItem) { + for (Iterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext(); ) + addLiterals(rankTerms, i.next(), indexFacts); + } + else if (item instanceof TermItem) { + TermItem termItem = (TermItem)item; + Index index = indexFacts.getIndex(termItem.getIndexName()); + if (index.getLiteralBoost()) + rankTerms.addItem(new WordItem(toLowerCase(termItem.getRawWord()), index.getName() + "_literal")); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java new file mode 100644 index 00000000000..7df98fdd093 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NoRankingSearcher.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + + +import java.util.List; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Sorting.FieldOrder; +import com.yahoo.search.searchchain.Execution; + + +/** + * Avoid doing relevance calculations if sorting only + * on attributes. + * + * @author Steinar Knutsen + */ +@After("rawQuery") +@Before("transformedQuery") +public class NoRankingSearcher extends Searcher { + + private static final String RANK = "[rank]"; + private static final String UNRANKED = "unranked"; + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<FieldOrder> s = (query.getRanking().getSorting() != null) ? query.getRanking().getSorting().fieldOrders() : null; + if (s == null) { + return execution.search(query); + } + for (FieldOrder f : s) { + if (RANK.equals(f.getFieldName())) { + return execution.search(query); + } + } + query.getRanking().setProfile(UNRANKED); + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java new file mode 100644 index 00000000000..c79933dbbd0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NonPhrasingSearcher.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; + +import java.util.List; + +/** + * <p>Detects and removes certain phrases from the query.</p> + * + * @author bratseth + */ +@After("rawQuery") +@Before("transformedQuery") +public class NonPhrasingSearcher extends Searcher { + + private static final CompoundName suggestonly=new CompoundName("suggestonly"); + + private PhraseMatcher phraseMatcher; + + public NonPhrasingSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + setupAutomatonFile(config.com().yahoo().prelude().querytransform().NonPhrasingSearcher().automatonfile()); + } + + /** + * Creates a nonphrasing searcher + * + * @param phraseAutomatonFile the file containing phrases which should be removed + * @throws IllegalStateException if the automata component is unavailable + * in the current environment + * @throws IllegalArgumentException if the file is not found + */ + public NonPhrasingSearcher(String phraseAutomatonFile) { + setupAutomatonFile(phraseAutomatonFile); + } + + private void setupAutomatonFile(String phraseAutomatonFile) { + if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) { + //no file, just use dummy matcher + phraseMatcher = PhraseMatcher.getNullMatcher(); + } else { + //use real matcher + phraseMatcher = new PhraseMatcher(phraseAutomatonFile); + } + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<PhraseMatcher.Phrase> phrases=phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot()); + if (phrases!=null && !query.properties().getBoolean(suggestonly, false)) { + remove(phrases); + query.trace("Removing stop words",true,2); + } + return execution.search(query); + } + + private void remove(List<PhraseMatcher.Phrase> phrases) { + // Removing the leaf replace phrases first to preserve + // the start index of each replace phrase until removing + for (int i=phrases.size()-1; i>=0; i-- ) { + PhraseMatcher.Phrase phrase= phrases.get(i); + if (phrase.getLength()<phrase.getOwner().getItemCount()) // Don't removeField all + phrase.remove(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java new file mode 100644 index 00000000000..1d77b9184a3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/NormalizingSearcher.java @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import java.util.*; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.IndexFacts.Session; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.WordAlternativesItem.Alternative; +import com.yahoo.search.Searcher; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.search.Query; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Normalizes accents + * + * @author bratseth + */ +@After({ PhaseNames.UNBLENDED_RESULT, STEMMING }) +@Provides(NormalizingSearcher.ACCENT_REMOVAL) +public class NormalizingSearcher extends Searcher { + + public static final String ACCENT_REMOVAL = "AccentRemoval"; + private final Linguistics linguistics; + + @Inject + public NormalizingSearcher(Linguistics linguistics) { + this.linguistics = linguistics; + } + + protected boolean handles(String command) { + return "normalize".equals(command); + } + + public String getFunctionName() { + return "Normalizing accents"; + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + normalize(query, execution.context().getIndexFacts().newSession(query)); + return execution.search(query); + } + + protected void normalize(Query query, IndexFacts.Session indexFacts) { + String oldQuery = (query.getTraceLevel() >= 2) ? query.getModel().getQueryTree().getRoot().toString() : ""; + normalizeBody(query, indexFacts); + if (query.getTraceLevel() >= 2) + if (!(oldQuery.equals(query.getModel().getQueryTree().getRoot().toString()))) query.trace(getFunctionName(), true, 2); + } + + private Query normalizeBody(Query query, IndexFacts.Session indexFacts) { + Item root = query.getModel().getQueryTree().getRoot(); + Language language = query.getModel().getParsingLanguage(); + if (root instanceof BlockItem) { + List<Item> rootItems = new ArrayList<>(1); + + rootItems.add(root); + ListIterator<Item> i = rootItems.listIterator(); + + i.next(); + normalizeBlocks(language, indexFacts, (BlockItem) root, i); + query.getModel().getQueryTree().setRoot(rootItems.get(0)); + } else if (root instanceof CompositeItem) { + query.getModel().getQueryTree().setRoot(normalizeComposite(language, indexFacts, (CompositeItem) root)); + } + return query; + } + + private Item normalizeComposite(Language language, IndexFacts.Session indexFacts, CompositeItem item) { + if (item instanceof PhraseItem) { + return normalizePhrase(language, indexFacts, (PhraseItem) item); + } + else { + for (ListIterator<Item> i = item.getItemIterator(); i.hasNext(); ) { + Item current = i.next(); + + if (current instanceof BlockItem) { + normalizeBlocks(language, indexFacts, (BlockItem) current, i); + } else if (current instanceof CompositeItem) { + Item currentProcessed = normalizeComposite(language, indexFacts, (CompositeItem) current); + i.set(currentProcessed); + } + } + return item; + } + } + + private void normalizeBlocks(Language language, IndexFacts.Session indexFacts, BlockItem block, ListIterator<Item> i) { + if (block instanceof TermItem) { + if (block instanceof WordAlternativesItem) { + normalizeAlternatives(language, indexFacts, (WordAlternativesItem) block); + } else { + normalizeWord(language, indexFacts, (TermItem) block, i); + } + } else { + for (ListIterator<Item> j = ((SegmentItem) block).getItemIterator(); j.hasNext();) + normalizeWord(language, indexFacts, (TermItem) j.next(), j); + } + } + + private void normalizeAlternatives(Language language, Session indexFacts, WordAlternativesItem block) { + if (!block.isNormalizable()) { + return; + } + { + Index index = indexFacts.getIndex(block.getIndexName()); + if (index.isAttribute()) { + return; + } + if (!index.getNormalize()) { + return; + } + } + + List<Alternative> terms = block.getAlternatives(); + for (Alternative term : terms) { + String accentDropped = linguistics.getTransformer().accentDrop(term.word, language); + if (!term.word.equals(accentDropped) && accentDropped.length() > 0) { + block.addTerm(accentDropped, term.exactness * .7d); + } + } + } + + private Item normalizePhrase(Language language, IndexFacts.Session indexFacts, PhraseItem phrase) { + if ( ! indexFacts.getIndex(phrase.getIndexName()).getNormalize()) return phrase; + + for (ListIterator<Item> i = phrase.getItemIterator(); i.hasNext();) { + IndexedItem content = (IndexedItem) i.next(); + + if (content instanceof TermItem) { + normalizeWord(language, indexFacts, (TermItem) content, i); + } + else { + PhraseSegmentItem segment = (PhraseSegmentItem) content; + for (ListIterator<Item> j = segment.getItemIterator(); j.hasNext();) + normalizeWord(language, indexFacts, (TermItem) j.next(), j); + } + } + return phrase; + } + + private void normalizeWord(Language language, IndexFacts.Session indexFacts, TermItem term, ListIterator<Item> i) { + if ( ! (term instanceof WordItem)) return; + if ( ! term.isNormalizable()) return; + Index index = indexFacts.getIndex(term.getIndexName()); + if (index.isAttribute()) return; + if ( ! index.getNormalize()) return; + + WordItem word = (WordItem) term; + String accentDropped = linguistics.getTransformer().accentDrop(word.getWord(), language); + if (accentDropped.length() == 0) + i.remove(); + else + word.setWord(accentDropped); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java new file mode 100644 index 00000000000..da969986394 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhraseMatcher.java @@ -0,0 +1,556 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.fsa.FSA; +import com.yahoo.prelude.query.*; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import static com.yahoo.language.LinguisticsCase.toLowerCase; + +/** + * <p>Detects query phrases using an automaton. This class is thread safe.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class PhraseMatcher { + + private FSA phraseFSA; + + private boolean matchPhraseItems=false; + + private boolean matchSingleItems=false; + + /** Whether this should ignore regular plural/singular form differences when matching */ + private boolean ignorePluralForm=false; + + /** False to matche the longest phrase, true to match <i>all</i> phrases */ + private boolean matchAll =false; + + /** For null subclass only */ + PhraseMatcher() { + } + + /** + * Creates a phrase matcher. This will not ignore plural/singular form differences when matching + * + * @param phraseAutomatonFile the file containing phrases to match + * @throws IllegalArgumentException if the file is not found + */ + public PhraseMatcher(String phraseAutomatonFile) { + this(phraseAutomatonFile,false); + } + + /** + * Creates a phrase matcher + * + * @param phraseAutomatonFile the file containing phrases to match + * @param ignorePluralForm whether we should ignore plural and singular forms as matches + * @throws IllegalArgumentException if the file is not found + */ + public PhraseMatcher(String phraseAutomatonFile,boolean ignorePluralForm) { + this.ignorePluralForm=ignorePluralForm; + phraseFSA=new FSA(phraseAutomatonFile); + } + + /** + * Creates a phrase matcher + * + * @param phraseAutomatonFSA the fsa containing phrases to match + * @param ignorePluralForm whether we should ignore plural and singular forms as matches + * @throws IllegalArgumentException if FSA is null + */ + public PhraseMatcher(FSA phraseAutomatonFSA,boolean ignorePluralForm) { + if(phraseAutomatonFSA==null) throw new IllegalArgumentException("FSA is null"); + this.ignorePluralForm=ignorePluralForm; + phraseFSA=phraseAutomatonFSA; + } + + /** + * Set whether to match words contained in phrase items as well. + * Default is false - don't match words contained in phrase items + */ + public void setMatchPhraseItems(boolean matchPhraseItems) { + this.matchPhraseItems=matchPhraseItems; + } + + /** + * Sets whether single items should be matched and returned as phrase matches. + * Default is false. + */ + public void setMatchSingleItems(boolean matchSingleItems) { + this.matchSingleItems=matchSingleItems; + } + + /** Sets whether we should ignore plural/singular form when matching */ + public void setIgnorePluralForm(boolean ignorePluralForm) { this.ignorePluralForm=ignorePluralForm; } + + /** + * Sets whether to return the longest matching phrase when there are overlapping matches (default), + * or <i>all</i> matching phrases + */ + public void setMatchAll(boolean matchAll) { this.matchAll =matchAll; } + + /** + * Finds all phrases (word sequences of length 1 or higher) + * of the same index, not negative items of a notitem, + * which constitutes a complete entry in the automaton of this matcher + * + * @param queryItem the root query item in which to match phrases + * @return the matched phrases, or <b>null</b> if there was no matches + */ + public List<Phrase> matchPhrases(Item queryItem) { + if (matchSingleItems && (queryItem instanceof TermItem)) { + return matchSingleItem((TermItem)queryItem); + } + else { + MatchedPhrases phrases=new MatchedPhrases(); + recursivelyMatchPhrases(queryItem,phrases); + return phrases.toList(); + } + } + + /** Returns null if this word does not match the automaton, a single-item list if it does */ + private List<Phrase> matchSingleItem(TermItem termItem) { + String matchWord=toLowerCase(termItem.stringValue()); + String replaceWord=null; + FSA.State state = phraseFSA.getState(); + if (!matches(state,matchWord)) { + if (!ignorePluralForm) return null; + matchWord=switchForm(matchWord); + if (!matches(state,matchWord)) return null; + replaceWord=matchWord; + } + + List<Phrase> itemList=new java.util.ArrayList<>(1); + itemList.add(new Phrase(termItem,replaceWord,state.dataString())); + return itemList; + + } + + private boolean matches(FSA.State state,String word) { + state.start(); + state.delta(word); + return state.isFinal(); + } + + /** Find matches within a composite */ + private void recursivelyMatchPhrases(Item item,MatchedPhrases phrases) { + if (item==null) return; + if ( ! (item instanceof CompositeItem) ) return; + if ( !matchPhraseItems && item instanceof PhraseItem ) return; + + CompositeItem owner=(CompositeItem)item; + int i=0; + int checkItemCount=owner.getItemCount(); + if (owner instanceof NotItem) + checkItemCount=1; // Skip negatives + + while (i<checkItemCount) { + int largestFoundLength=findPhrasesAtStartpoint(i,owner,phrases); + + if (largestFoundLength==0 || matchAll) { + recursivelyMatchPhrases(owner.getItem(i),phrases); + i=i+1; + } + else { + i=i+largestFoundLength; + } + } + } + + /** + * If (!matchAll), finds longest possible phrase starting at the + * given index in the owner and adds it to phrases. + * + * If (matchAll), finds all possible phrases starting at the given index + * + * @return the length of the largest phrase found at this starting point, or 0 if none + */ + private int findPhrasesAtStartpoint(int startIndex,CompositeItem owner,MatchedPhrases phrases) { + FSA.State state = phraseFSA.getState(); + int currentIndex=startIndex; + Phrase phrase=null; + List<String> replaceList=null; + + String index=null; + state.start(); + + while (currentIndex<owner.getItemCount()) { // Loop until the largest possible phrase is passed + Item current=owner.getItem(currentIndex); + if (! (current instanceof TermItem) ) break; + + TermItem termItem=(TermItem)current; + + if (state.isStartState()) + index=termItem.getIndexName(); + else + if (!termItem.getIndexName().equals(index)) break; + + String lowercased = toLowerCase(termItem.stringValue()); + boolean matched=state.tryDeltaWord(lowercased); + if (!matched && ignorePluralForm) { + String invertedWord=switchForm(lowercased); + matched=state.tryDeltaWord(invertedWord); + if (matched) + replaceList=setReplace(replaceList,currentIndex-startIndex,invertedWord); + } + if (!matched) break; + + if (state.isFinal()) // Legal return point reached, but we'll look for longer ones too + phrase=new Phrase(owner,replaceList,startIndex,currentIndex-startIndex+1,state.dataString()); + if (matchAll) + phrases.add(phrase); + currentIndex++; + } + + if (phrase==null) return 0; + if (!matchAll) + phrases.add(phrase); + return phrase.getLength(); + } + + /** Adds a replace word at an index, and any required null's to get to this item. Creates the list if it is null */ + private List<String> setReplace(List<String> replaceList,int index,String invertedWord) { + if (replaceList==null) + replaceList=new ArrayList<>(); + while (replaceList.size()<index) + replaceList.add(null); + replaceList.add(invertedWord); + return replaceList; + } + + /** Makes this plural if it is singular and vice-versa */ + private String switchForm(String word) { + if (word.endsWith("s") && word.length()>2) + return word.substring(0,word.length()-1); + return word + "s"; + } + + /** Holder of a lazily created list of matched phrases */ + private static class MatchedPhrases { + + private List<Phrase> phrases=null; + + private void add(Phrase phrase) { + if (phrase==null) return; + if (phrases==null) + phrases=new java.util.ArrayList<>(5); + phrases.add(phrase); + } + + /** Returns the list of contained phrases, or null */ + public List<Phrase> toList() { return phrases; } + + } + + /** + * Points to a collection of word items (one or more) + * which is matches a complete listing in an automat + */ + public static class Phrase { + + /** Points to the single or multiple words matched by this phrase */ + private Matched matched; + + private String data; + + + private Phrase(Matched matched,String data) { + this.matched=matched; + this.data=data; + } + + + public Phrase(TermItem item,String replace,String data) { + this(new MatchedWord(item,replace),data); + } + + /** + * Creates a phrase match + * + * @param owner the composite we have matched within + * @param replace the list of string to replace the matched by, or null to not replace. + * This transfers ownership of this list to this class - it can not subsequently be accessed + * by the caller. If this list is set, it must have the same length as <code>length</code>. + * No replacement is represented by null items within the list. + * @param startIndex the first index in composite to match + * @param length the length of the matched terms + * @param data the data accompanying this match + */ + private Phrase(CompositeItem owner,List<String> replace,int startIndex,int length,String data) { + this(new MatchedComposite(owner,replace,startIndex,length),data); + } + + /** Returns the owner, or null if this is a single item phrase with no owner */ + public CompositeItem getOwner() { return matched.getOwner(); } + + public int getStartIndex() { return matched.getStartIndex(); } + + public int getLength() { return matched.getLength(); } + + /** Returns the data stored by the automaton for this phrase at this position, or null if none */ + public String getData() { return data; } + + /** Returns the n'th item in this, throws if index out of bounds */ + public TermItem getItem(int index) { + return matched.getItem(index); + } + + /** Returns true if this phrase contains all the words of the owner, or if there is no owner */ + public boolean isComplete() { + return matched.isComplete(); + } + + /** Replaces the words items of this phrase with a phrase item. Does nothing if this is not a composite match */ + public void replace() { + matched.replace(); + } + + /** Removes the word items of this phrase. Does nothing nuless this is a composite */ + public void remove() { + matched.remove(); + } + + /** Returns the length of the underlying phrase */ + public int getBackedLength() { + return matched.getBackedLength(); + } + + /** Returns the items of this phrase as a read-only iterator */ + public MatchIterator itemIterator() { + return new MatchIterator(this); + } + + public String toString() { + StringBuilder buffer=new StringBuilder("\""); + for (Iterator<Item> i=itemIterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + buffer.append("\""); + return buffer.toString(); + } + + private abstract static class Matched { + + public abstract CompositeItem getOwner(); + + public abstract int getStartIndex(); + + public abstract int getLength(); + + public abstract boolean isComplete(); + + /** Returns whether there is an index at the current item */ + public abstract boolean hasItemAt(int index); + + public void replace() {} + + public void remove() {} + + public abstract TermItem getItem(int index); + + public abstract String getReplace(int index); + + /** Returns the length of the underlying item */ + public abstract int getBackedLength(); + + public abstract boolean hasReplaces(); + + } + + private static class MatchedWord extends Matched { + + /** The term matched by this */ + private TermItem item; + + /** The word to replace the matched word by, or null to not replace */ + private String replace; + + public MatchedWord(TermItem item,String replace) { + this.item=item; + this.replace=replace; + } + + public Item getItem() { return item; } + + public boolean hasItemAt(int index) { + return index==0; + } + + public CompositeItem getOwner() { return null; } + + public int getStartIndex() { return 0; } + + public int getLength() { return 1; } + + public @Override TermItem getItem(int index) { + if (index!=0) throw new IndexOutOfBoundsException("No word at " + index + " in " + this); + return item; + } + + public boolean isComplete() { return true; } + + public int getBackedLength() { return 1; } + + public String getReplace(int index) { return replace; } + + public boolean hasReplaces() { return replace!=null; } + + } + + private static class MatchedComposite extends Matched { + + /** The item having the phrase words as direct descendants */ + private CompositeItem owner; + + /** The number of phrase items */ + private int length; + + private int initialOwnerLength; + + /** The (0-base) index of the first phrase word item in the owner */ + private int startIndex; + + /** The first matched item */ + private Item startItem; + + /** + * The word to replace by at the given index, or null if none of the phrase words should be replaced + * This is either null, or of length <code>length</code>, with null values where nothing should be replaced + */ + private List<String> replace=null; + + public MatchedComposite(CompositeItem owner,List<String> replace,int startIndex,int length) { + this.owner=owner; + this.initialOwnerLength=owner.getItemCount(); + this.replace = replace; + this.startIndex=startIndex; + this.startItem=owner.getItem(startIndex); + this.length=length; + } + + public CompositeItem getOwner() { return owner; } + + public int getStartIndex() { return startIndex; } + + public int getLength() { return length; } + + public int getBackedLength() { return owner.getItemCount()-startIndex; } + + public boolean hasItemAt(int index) { + adjustIfBackingChanged(); + if (startIndex<0) return false; // Invalid state because of backing changes + if ( index >= length ) return false; + if ( index+startIndex >= owner.getItemCount() ) return false; + return true; + } + + public boolean isComplete() { + return startIndex==0 && length==owner.getItemCount(); + } + + public @Override TermItem getItem(int index) { + adjustIfBackingChanged(); + return (TermItem)owner.getItem(startIndex+index); + } + + public String getReplace(int index) { + if (replace==null) return null; + return replace.get(index); + } + + public void replace() { + PhraseItem phrase=new PhraseItem(); + TermItem firstWord=(TermItem)owner.setItem(startIndex,phrase); + replace(firstWord,0); + phrase.setIndexName(firstWord.getIndexName()); + phrase.addItem(firstWord); + for (int i=1; i<length; i++) { + TermItem followingWord=(TermItem)owner.removeItem(startIndex+1); + replace(followingWord,i); + phrase.addItem(followingWord); + } + } + + private void replace(TermItem item,int index) { + if (replace==null) return; + String replaceString=replace.get(index); + if (replaceString==null) return; + item.setValue(replaceString); + } + + public void remove() { + for (int i=startIndex+length-1; i>=startIndex; i--) + owner.removeItem(i); + } + + public boolean hasReplaces() { return replace!=null; } + + /** + * Detects and attemts to compensate for a changed backing. Stop-gap measure until we get a through + * design for this + */ + private void adjustIfBackingChanged() { + if (owner.getItemCount()==initialOwnerLength) return; + startIndex=owner.getItemIndex(startItem); + } + + } + + public static class MatchIterator implements Iterator<Item> { + + private Phrase phrase; + + private int currentIndex=0; + + public MatchIterator(Phrase phrase) { + this.phrase=phrase; + } + + public boolean hasNext() { + return phrase.matched.hasItemAt(currentIndex); + //return (currentIndex<phrase.getLength()); + //return phrase.matched.hasItemAt(currentIndex); + } + + /** Returns the value to replace the item last returned by next(), or null to keep it as-is */ + public String getReplace() { + return phrase.matched.getReplace(currentIndex-1); + } + + public Item next() { + if (!hasNext()) + throw new NoSuchElementException(this + " has no more elements"); + + currentIndex++; + if ((phrase.matched instanceof MatchedWord)) + return ((MatchedWord)phrase.matched).getItem(); + else + return phrase.getOwner().getItem(phrase.getStartIndex()+currentIndex-1); + } + + public void remove() { + throw new UnsupportedOperationException("Can not remove from a phrasematcher phrase"); + } + + } + + } + + /** Returns a phrase matcher which (quickly) never matches anything */ + public static PhraseMatcher getNullMatcher() { + return new PhraseMatcher() { + + public List<Phrase> matchPhrases(Item item) { + return null; + } + }; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java new file mode 100644 index 00000000000..f3d4b09c65c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/PhrasingSearcher.java @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + + +import java.util.List; + +/** + * <p>Detects query phrases. When a phrase is detected in the query, + * the query is mutated to reflect this fact.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(PhrasingSearcher.PHRASE_REPLACEMENT) +public class PhrasingSearcher extends Searcher { + + private static final CompoundName suggestonly=new CompoundName("suggestonly"); + + public static final String PHRASE_REPLACEMENT = "PhraseReplacement"; + + private PhraseMatcher phraseMatcher; + + @Inject + public PhrasingSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + setupAutomatonFile(config.com().yahoo().prelude().querytransform().PhrasingSearcher().automatonfile()); + } + + public PhrasingSearcher(String phraseAutomatonFile) { + setupAutomatonFile(phraseAutomatonFile); + } + + private void setupAutomatonFile(String phraseAutomatonFile) { + if (phraseAutomatonFile == null || phraseAutomatonFile.trim().equals("")) { + //no file, just use dummy matcher + phraseMatcher = PhraseMatcher.getNullMatcher(); + } else { + //use real matcher + phraseMatcher = new PhraseMatcher(phraseAutomatonFile,true); + } + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + List<PhraseMatcher.Phrase> replacePhrases = phraseMatcher.matchPhrases(query.getModel().getQueryTree().getRoot()); + if (replacePhrases != null && !query.properties().getBoolean(suggestonly, false)) { + replace(replacePhrases); + query.trace("Replacing phrases", true, 2); + } + return execution.search(query); + } + + /** Replaces all phrases longer than one word with a PhraseItem */ + private void replace(List<PhraseMatcher.Phrase> phrases) { + // Replacing the leaf replace phrases first to preserve + // the start index of each replace phrase until replacement + for (int i = phrases.size()-1; i >= 0; i--) { + PhraseMatcher.Phrase phrase = phrases.get(i); + if (phrase.getLength() > 1) + phrase.replace(); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java new file mode 100644 index 00000000000..fe680bd5ad0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java @@ -0,0 +1,241 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.EquivItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NotItem; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.OrItem; +import com.yahoo.prelude.query.RankItem; +import com.yahoo.prelude.query.SimpleIndexedItem; +import com.yahoo.prelude.query.SubstringItem; +import com.yahoo.search.Query; +import com.yahoo.search.query.Model; +import com.yahoo.search.result.Hit; + +/** + * @author balder + */ +public class QueryRewrite { + + private static enum Recall { + RECALLS_EVERYTHING, + RECALLS_NOTHING, + UNKNOWN_RECALL + } + + /** + * Optimize multiple NotItems under and or by collapsing them in to one and leaving + * the positive ones behind in its place and moving itself with the original and as its positive item + * and the union of all the negative items of all the original NotItems as its negative items. + * + * @param query to optimize + */ + public static void optimizeAndNot(Query query) { + Item root = query.getModel().getQueryTree().getRoot(); + Item possibleNewRoot = optimizeAndNot(root); + if (root != possibleNewRoot) { + query.getModel().getQueryTree().setRoot(possibleNewRoot); + } + } + private static Item optimizeAndNot(Item node) { + if (node instanceof CompositeItem) { + return extractAndNotRecursively((CompositeItem) node); + } + return node; + } + private static CompositeItem extractAndNotRecursively(CompositeItem parent) { + for (int i = 0; i < parent.getItemCount(); i++) { + Item child = parent.getItem(i); + Item possibleNewChild = optimizeAndNot(child); + if (child != possibleNewChild) { + parent.setItem(i, possibleNewChild); + } + } + if (parent instanceof AndItem) { + return extractAndNot((AndItem) parent); + } + return parent; + } + private static CompositeItem extractAndNot(AndItem parent) { + NotItem theOnlyNot = null; + for (int i = 0; i < parent.getItemCount(); i++) { + Item child = parent.getItem(i); + if (child instanceof NotItem) { + NotItem thisNot = (NotItem) child; + parent.setItem(i, thisNot.getPositiveItem()); + if (theOnlyNot == null) { + theOnlyNot = thisNot; + theOnlyNot.setPositiveItem(parent); + } else { + for (int j=1; j < thisNot.getItemCount(); j++) { + theOnlyNot.addNegativeItem(thisNot.getItem(j)); + } + } + } + } + return (theOnlyNot != null) ? theOnlyNot : parent; + } + /** + * Optimizes the given query tree based on its {@link Model#getRestrict()} parameter, if any. + * + * @param query to optimize. + */ + public static void optimizeByRestrict(Query query) { + if (query.getModel().getRestrict().size() != 1) { + return; + } + Item root = query.getModel().getQueryTree().getRoot(); + if (optimizeByRestrict(root, query.getModel().getRestrict().iterator().next()) == Recall.RECALLS_NOTHING) { + query.getModel().getQueryTree().setRoot(new NullItem()); + } + } + + private static Recall optimizeByRestrict(Item item, String restrictParam) { + if (item instanceof SimpleIndexedItem) { + return optimizeIndexedItemByRestrict((SimpleIndexedItem)item, restrictParam); + } else if (item instanceof NotItem) { + return optimizeNotItemByRestrict((NotItem)item, restrictParam); + } else if (item instanceof CompositeItem) { + return optimizeCompositeItemByRestrict((CompositeItem)item, restrictParam); + } else { + return Recall.UNKNOWN_RECALL; + } + } + + private static Recall optimizeIndexedItemByRestrict(SimpleIndexedItem item, String restrictParam) { + if (!Hit.SDDOCNAME_FIELD.equals(item.getIndexName())) { + return Recall.UNKNOWN_RECALL; + } + // a query term searching for sddocname will either recall everything or nothing, depending on whether + // the term matches the restrict parameter or not + return restrictParam.equals(item.getIndexedString()) + ? Recall.RECALLS_EVERYTHING + : Recall.RECALLS_NOTHING; + } + + private static Recall optimizeNotItemByRestrict(NotItem item, String restrictParam) { + // first item is the positive one + if (optimizeByRestrict(item.getItem(0), restrictParam) == Recall.RECALLS_NOTHING) { + return Recall.RECALLS_NOTHING; + } + // all the remaining items are negative ones + for (int i = item.getItemCount(); --i >= 1; ) { + Item child = item.getItem(i); + switch (optimizeByRestrict(child, restrictParam)) { + case RECALLS_EVERYTHING: + return Recall.RECALLS_NOTHING; + case RECALLS_NOTHING: + item.removeItem(i); + break; + } + } + return Recall.UNKNOWN_RECALL; + } + + private static Recall optimizeCompositeItemByRestrict(CompositeItem item, String restrictParam) { + for (int i = item.getItemCount(); --i >= 0; ) { + switch (optimizeByRestrict(item.getItem(i), restrictParam)) { + case RECALLS_EVERYTHING: + if ((item instanceof OrItem) || (item instanceof EquivItem)) { + retainChild(item, i); + return Recall.RECALLS_EVERYTHING; + } else if ((item instanceof AndItem) || (item instanceof NearItem)) { + item.removeItem(i); + } else if (item instanceof RankItem) { + // empty + } else { + throw new UnsupportedOperationException(item.getClass().getName()); + } + break; + case RECALLS_NOTHING: + if ((item instanceof OrItem) || (item instanceof EquivItem)) { + item.removeItem(i); + } else if ((item instanceof AndItem) || (item instanceof NearItem)) { + return Recall.RECALLS_NOTHING; + } else if (item instanceof RankItem) { + item.removeItem(i); + } else { + throw new UnsupportedOperationException(item.getClass().getName()); + } + break; + } + } + return Recall.UNKNOWN_RECALL; + } + + private static void retainChild(CompositeItem item, int childIdx) { + Item child = item.removeItem(childIdx); + for (int i = item.getItemCount(); --i >= 0; ) { + item.removeItem(i); + } + item.addItem(child); + } + + /** + * Collapses all single-child {@link CompositeItem}s into their parent item. + * + * @param query The query whose composites to collapse. + */ + public static void collapseSingleComposites(Query query) { + Item oldRoot = query.getModel().getQueryTree().getRoot(); + Item newRoot = collapseSingleComposites(oldRoot); + if (oldRoot != newRoot) { + query.getModel().getQueryTree().setRoot(newRoot); + } + } + + private static Item collapseSingleComposites(Item item) { + if (!(item instanceof CompositeItem)) { + return item; + } + CompositeItem parent = (CompositeItem)item; + int numChildren = parent.getItemCount(); + for (int i = 0; i < numChildren; ++i) { + Item oldChild = parent.getItem(i); + Item newChild = collapseSingleComposites(oldChild); + if (oldChild != newChild) { + parent.setItem(i, newChild); + } + } + return numChildren == 1 ? parent.getItem(0) : item; + } + + /** + * Replaces and {@link SimpleIndexedItem} searching in the {@link Hit#SDDOCNAME_FIELD} with an item + * appropriate for the search node. + * + * @param query The query to rewrite. + */ + public static void rewriteSddocname(Query query) { + Item oldRoot = query.getModel().getQueryTree().getRoot(); + Item newRoot = rewriteSddocname(oldRoot); + if (oldRoot != newRoot) { + query.getModel().getQueryTree().setRoot(newRoot); + } + } + + private static Item rewriteSddocname(Item item) { + if (item instanceof CompositeItem) { + CompositeItem parent = (CompositeItem)item; + for (int i = 0, len = parent.getItemCount(); i < len; ++i) { + Item oldChild = parent.getItem(i); + Item newChild = rewriteSddocname(oldChild); + if (oldChild != newChild) { + parent.setItem(i, newChild); + } + } + } else if (item instanceof SimpleIndexedItem) { + SimpleIndexedItem oldItem = (SimpleIndexedItem)item; + if (Hit.SDDOCNAME_FIELD.equals(oldItem.getIndexName())) { + SubstringItem newItem = new SubstringItem(oldItem.getIndexedString()); + newItem.setIndexName("[documentmetastore]"); + return newItem; + } + } + return item; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java new file mode 100644 index 00000000000..4a47b23d30d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/RecallSearcher.java @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.parser.AnyParser; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; +import java.util.Stack; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * This searcher parses the content of the "recall" query property as a filter expression alongside a placeholder + * query string. The node corresponding to the placeholder query is then swapped with the current query tree. This allows + * us to parse "recall" using the same rules as "filter" without modifying the parser. + * + * If the "recall" property is unset, this searcher does nothing. + * + * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + */ +@After("com.yahoo.search.querytransform.WandSearcher") +@Before({STEMMING, ACCENT_REMOVAL}) +public class RecallSearcher extends Searcher { + + private static final CompoundName recallName=new CompoundName("recall"); + + @Override + public com.yahoo.search.Result search(Query query, Execution execution) { + String recall = query.properties().getString(recallName); + if (recall == null) { + return execution.search(query); + } + AnyParser parser = new AnyParser( + ParserEnvironment.fromExecutionContext(execution.context())); + QueryTree root = parser.parse(Parsable.fromQueryModel(query.getModel()) + .setQuery("foo").setFilter(recall)); + String err; + if (root.getRoot() instanceof NullItem) { + err = "Failed to parse recall parameter."; + } else if (!(root.getRoot() instanceof CompositeItem)) { + err = "Expected CompositeItem root node, got " + + root.getClass().getSimpleName() + "."; + } else if (hasRankItem(root.getRoot())) { + query.getModel().getQueryTree().setRoot(root.getRoot()); + + err = "Recall contains at least one rank item."; + } else { + WordItem placeholder = findOrigWordItem(root.getRoot(), "foo"); + if (placeholder == null) { + err = "Could not find placeholder workQuery root."; + } else { + updateFilterTerms(root); + CompositeItem parent = placeholder.getParent(); + parent.setItem(parent.getItemIndex(placeholder), query + .getModel().getQueryTree().getRoot()); + query.getModel().getQueryTree().setRoot(root.getRoot()); + + query.trace("ANDed recall tree with root workQuery node.", + true, 3); + return execution.search(query); + } + } + com.yahoo.search.Result ret = new com.yahoo.search.Result(query); + ret.hits().addError(ErrorMessage.createInvalidQueryParameter(err)); + return ret; + } + + /** + * Returns true if the given item tree contains at least one instance of {@link RankItem}. + * + * @param root The root of the tree to check. + * @return True if a rank item was found. + */ + private static boolean hasRankItem(Item root) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item instanceof RankItem) { + return true; + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + return false; + } + + /** + * Returns the first word item contained in the given item tree that is an instance of {@link WordItem} with the + * given word value. + * + * @param root The root of the tree to check. + * @param value The word to look for. + * @return The first node found. + */ + private static WordItem findOrigWordItem(Item root, String value) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item.getCreator() == Item.ItemCreator.ORIG && + item instanceof WordItem) + { + WordItem word = (WordItem)item; + if (word.getWord().equals(value)) { + return word; + } + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + return null; + } + + /** + * Marks all filter terms in the given query tree as unranked. + * + * @param root The root of the tree to update. + */ + private static void updateFilterTerms(Item root) { + Stack<Item> stack = new Stack<>(); + stack.push(root); + while (!stack.isEmpty()) { + Item item = stack.pop(); + if (item.getCreator() == Item.ItemCreator.FILTER) { + item.setRanked(false); + } + if (item instanceof CompositeItem) { + CompositeItem lst = (CompositeItem)item; + for (Iterator<Item> it = lst.getItemIterator(); it.hasNext();) { + stack.push(it.next()); + } + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java new file mode 100644 index 00000000000..dfa7a024224 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/StemmingSearcher.java @@ -0,0 +1,431 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.querytransform; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemMode; +import com.yahoo.language.process.StemList; +import com.yahoo.log.LogLevel; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.*; +import com.yahoo.prelude.query.WordAlternativesItem.Alternative; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +import static com.yahoo.prelude.querytransform.CJKSearcher.TERM_ORDER_RELAXATION; + + +/** + * Replaces query terms with their stems + * + * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Lidal</a> + * @author bratseth + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After({PhaseNames.UNBLENDED_RESULT, TERM_ORDER_RELAXATION}) +@Provides(StemmingSearcher.STEMMING) +public class StemmingSearcher extends Searcher { + + public static final String STEMMING = "Stemming"; + public static final CompoundName DISABLE = new CompoundName("nostemming"); + private final Linguistics linguistics; + + public StemmingSearcher(Linguistics linguistics) { + this.linguistics = linguistics; + } + + @Inject + public StemmingSearcher(ComponentId id, Linguistics linguistics) { + super(id); + this.linguistics = linguistics; + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().getBoolean(DISABLE)) return execution.search(query); + + IndexFacts.Session indexFacts = execution.context().getIndexFacts().newSession(query); + Item newRoot = replaceTerms(query, indexFacts); + query.getModel().getQueryTree().setRoot(newRoot); + + query.trace(getFunctionName(), true, 2); + + Highlight highlight = query.getPresentation().getHighlight(); + if (highlight != null) { + Set<String> highlightFields = highlight.getHighlightItems().keySet(); + for (String field : highlightFields) { + StemMode stemMode = indexFacts.getIndex(field).getStemMode(); + if (stemMode != StemMode.NONE) { + Item newHighlight = scan(highlight.getHighlightItems().get(field), false, Language.ENGLISH, indexFacts, null); + highlight.getHighlightItems().put(field, (AndItem)newHighlight); + } + } + } + return execution.search(query); + } + + public String getFunctionName() { return "Stemming"; } + + private Item replaceTerms(Query q, IndexFacts.Session indexFacts) { + Language l = q.getModel().getParsingLanguage(); + if (l == Language.UNKNOWN) { + return q.getModel().getQueryTree().getRoot(); + } + return scan(q.getModel().getQueryTree().getRoot(), l.isCjk(), l, indexFacts, + createReverseConnectivities(q.getModel().getQueryTree().getRoot())); + } + + private Map<Item, TaggableItem> createReverseConnectivities(Item root) { + return populateReverseConnectivityMap(root, new IdentityHashMap<>()); + } + + private Map<Item, TaggableItem> populateReverseConnectivityMap(Item root, Map<Item, TaggableItem> reverseConnectivity) { + if (root instanceof TaggableItem) { + TaggableItem asTaggable = (TaggableItem) root; + Item connectsTo = asTaggable.getConnectedItem(); + if (connectsTo != null) { + reverseConnectivity.put(connectsTo, asTaggable); + } + } + if (root instanceof CompositeItem && !(root instanceof BlockItem)) { + CompositeItem c = (CompositeItem) root; + for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { + Item item = i.next(); + populateReverseConnectivityMap(item, reverseConnectivity); + } + } + return reverseConnectivity; + } + + private Item scan(Item item, + boolean isCJK, + Language l, + IndexFacts.Session indexFacts, + Map<Item, TaggableItem> reverseConnectivity) { + if (item == null) { + return null; + } else if (item instanceof BlockItem) { + return checkBlock((BlockItem) item, isCJK, l, indexFacts, reverseConnectivity); + } else if (item instanceof CompositeItem) { + CompositeItem comp = (CompositeItem) item; + ListIterator<Item> i = comp.getItemIterator(); + + while (i.hasNext()) { + Item original = i.next(); + Item transformed = scan(original, isCJK, l, indexFacts, reverseConnectivity); + if (original != transformed) + i.set(transformed); + } + return item; + } else { + return item; + } + } + + private Item checkBlock(BlockItem b, boolean isCJK, Language language, + IndexFacts.Session indexFacts, Map<Item, TaggableItem> reverseConnectivity) { + if (b instanceof PrefixItem || !b.isWords()) return (Item) b; + + if (b.isFromQuery() && !b.isStemmed()) { + final Index index = indexFacts.getIndex(b.getIndexName()); + StemMode stemMode = index.getStemMode(); + if (stemMode != StemMode.NONE) return stem(b, isCJK, language, reverseConnectivity, index); + } + return (Item) b; + } + + private Substring getOffsets(BlockItem b) { + if (b instanceof TermItem) { + return b.getOrigin(); + } else if (b instanceof CompositeItem) { + Item i = ((CompositeItem) b).getItem(0); + if (i instanceof TermItem) { + return ((TermItem) i).getOrigin(); // this should always be the case + } else { + getLogger().log(LogLevel.WARNING, + "Weird, BlockItem '" + b + "' was a composite containing " + i.getClass().getName() + + ", expected TermItem."); + } + } + return null; + } + + // The rewriting logic is here + private Item stem(BlockItem current, boolean isCJK, + Language language, Map<Item, TaggableItem> reverseConnectivity, Index index) { + Item blockAsItem = (Item)current; + CompositeItem composite; + List<StemList> segments = linguistics.getStemmer().stem(current.stringValue(), index.getStemMode(), language); + String indexName = current.getIndexName(); + Substring substring = getOffsets(current); + + if (segments.size() == 1) { + TaggableItem w = singleWordSegment(current, segments.get(0), index, substring); + setMetaData(current, reverseConnectivity, w); + return (Item) w; + } + + if (isCJK) { + composite = chooseCompositeForCJK(current, + ((Item) current).getParent(), + indexName); + } else { + composite = phraseSegment(current, indexName); + } + + for (StemList segment : segments) { + TaggableItem w = singleWordSegment(current, segment, index, substring); + + if (composite instanceof AndSegmentItem) { + setSignificance(w, current); + } + composite.addItem((Item) w); + } + if (composite instanceof AndSegmentItem) { + andSegmentConnectivity(current, reverseConnectivity, composite); + } + copyAttributes(blockAsItem, composite); + composite.lock(); + + if (composite instanceof PhraseSegmentItem) { + PhraseSegmentItem replacement = (PhraseSegmentItem) composite; + setSignificance(replacement, current); + phraseSegmentConnectivity(current, reverseConnectivity, replacement); + } + + return composite; + } + + private void phraseSegmentConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, + PhraseSegmentItem replacement) { + Connectivity c = getConnectivity(current); + if (c != null) { + replacement.setConnectivity(c.word, c.value); + reverseConnectivity.put(c.word, replacement); + } + setConnectivity(current, reverseConnectivity, replacement); + } + + private void andSegmentConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, CompositeItem composite) { + // if the original has connectivity to something, add to last word + Connectivity connectivity = getConnectivity(current); + if (connectivity != null) { + TaggableItem w = lastWord(composite); + if (w != null) { + w.setConnectivity(connectivity.word, connectivity.value); + reverseConnectivity.put(connectivity.word, w); + } + } + // If we create an AND from something taggable, add connectivity to the first word + TaggableItem w = firstWord(composite); + if (w != null) { + setConnectivity(current, reverseConnectivity, (Item) w); + } + } + + private Connectivity getConnectivity(BlockItem current) { + if (!(current instanceof TaggableItem)) { + return null; + } + TaggableItem t = (TaggableItem) current; + if (t.getConnectedItem() == null) { + return null; + } + return new Connectivity(t.getConnectedItem(), t.getConnectivity()); + } + + private TaggableItem firstWord(CompositeItem composite) { + // yes, this assumes only WordItem instances in the CompositeItem + int l = composite.getItemCount(); + if (l == 0) { + return null; + } else { + return (TaggableItem) composite.getItem(0); + } + } + + private TaggableItem lastWord(CompositeItem composite) { + // yes, this assumes only WordItem instances in the CompositeItem + int l = composite.getItemCount(); + if (l == 0) { + return null; + } else { + return (TaggableItem) composite.getItem(l - 1); + } + } + + private TaggableItem singleWordSegment(BlockItem current, + StemList segment, + Index index, + Substring substring) + { + String indexName = current.getIndexName(); + if (index.getLiteralBoost() || index.getStemMode() == StemMode.ALL) { + // Yes, this will create a new WordAlternativesItem even if stemmed + // and original form are identical. This is to decrease complexity + // in accent removal and lowercasing. + List<Alternative> terms = new ArrayList<>(segment.size() + 1); + terms.add(new Alternative(current.stringValue(), 1.0d)); + for (String term : segment) { + terms.add(new Alternative(term, 0.7d)); + } + WordAlternativesItem alternatives = new WordAlternativesItem(indexName, current.isFromQuery(), substring, terms); + return alternatives; + } else { + WordItem first = singleStemSegment((Item) current, segment.get(0), indexName, substring); + return first; + } + } + + private void setMetaData(BlockItem current, Map<Item, TaggableItem> reverseConnectivity, TaggableItem replacement) { + copyAttributes((Item) current, (Item) replacement); + setSignificance(replacement, current); + Connectivity c = getConnectivity(current); + if (c != null) { + replacement.setConnectivity(c.word, c.value); + reverseConnectivity.put(c.word, replacement); + } + setConnectivity(current, reverseConnectivity, (Item) replacement); + } + + private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName, + Substring substring) + { + WordItem replacement = new WordItem(stem, indexName, true, substring); + replacement.setStemmed(true); + copyAttributes(blockAsItem, replacement); + return replacement; + } + + private void setConnectivity(BlockItem current, + Map<Item, TaggableItem> reverseConnectivity, + Item replacement) + { + if (reverseConnectivity != null && !reverseConnectivity.isEmpty()) { + // This Map<Item, TaggableItem>.get(BlockItem) is technically wrong, but the Item API ensures its correctness + TaggableItem connectedTo = reverseConnectivity.get(current); + if (connectedTo != null) { + double connectivity = connectedTo.getConnectivity(); + connectedTo.setConnectivity(replacement, connectivity); + } + } + } + + private CompositeItem chooseCompositeForCJK(BlockItem current, + CompositeItem parent, String indexName) { + CompositeItem composite; + if (current.getSegmentingRule() == SegmentingRule.LANGUAGE_DEFAULT) { + if (parent instanceof PhraseItem + || current instanceof PhraseSegmentItem) { + composite = phraseSegment(current, indexName); + } else + composite = createAndSegment(current); + } else { + switch (current.getSegmentingRule()) { + case PHRASE: + composite = phraseSegment(current, indexName); + break; + case BOOLEAN_AND: + composite = createAndSegment(current); + break; + default: + throw new IllegalArgumentException( + "Unknown segmenting rule: " + + current.getSegmentingRule() + + ". This is a bug in Vespa, as the implementation has gotten out of sync." + + " Please create a ticket as soon as possible."); + } + } + return composite; + } + + private AndSegmentItem createAndSegment(BlockItem current) { + return new AndSegmentItem(current.stringValue(), true, true); + } + + private CompositeItem phraseSegment(BlockItem current, String indexName) { + CompositeItem composite; + composite = new PhraseSegmentItem(current.getRawWord(), current.stringValue(), true, true); + composite.setIndexName(indexName); + return composite; + } + + private void copyAttributes(Item blockAsItem, Item replacement) { + copyWeight(blockAsItem, replacement); + replacement.setCreator(blockAsItem.getCreator()); + replacement.setRanked(blockAsItem.isRanked()); + replacement.setPositionData(blockAsItem.usePositionData()); + } + + private void copyWeight(Item block, Item replacement) { + int weight = getWeight(block); + setWeight(replacement, weight); + } + + private int getWeight(Item block) { + if (block instanceof AndSegmentItem + && ((AndSegmentItem) block).getItemCount() > 0) { + return ((AndSegmentItem) block).getItem(0).getWeight(); + } else { + return block.getWeight(); + } + } + + // this smells like an extension of AndSegmentItem... + private void setWeight(Item replacement, int weight) { + if (replacement instanceof AndSegmentItem) { + for (Iterator<Item> i = ((AndSegmentItem) replacement).getItemIterator(); + i.hasNext();) { + i.next().setWeight(weight); + } + } else { + replacement.setWeight(weight); + } + } + + // TODO: Next four methods indicate Significance should be bubbled up the class hierarchy + // TODO: Perhaps Significance should bubble up, but the real problem is the class/interface hierarchy for queries is in dire need of restructuring + private void setSignificance(PhraseSegmentItem target, BlockItem original) { + if (hasExplicitSignificance(original)) target.setSignificance(getSignificance(original)); + } + + private void setSignificance(TaggableItem target, BlockItem original) { + if (hasExplicitSignificance(original)) target.setSignificance(getSignificance(original)); //copy + } + + private boolean hasExplicitSignificance(BlockItem blockItem) { + if (blockItem instanceof TermItem ) return ((TermItem)blockItem).hasExplicitSignificance(); + if (blockItem instanceof PhraseSegmentItem ) return ((PhraseSegmentItem)blockItem).hasExplicitSignificance(); + return false; + } + + //assumes blockItem instanceof TermItem or PhraseSegmentItem + private double getSignificance(BlockItem blockItem) { + if (blockItem instanceof TermItem) return ((TermItem)blockItem).getSignificance(); + else return ((PhraseSegmentItem)blockItem).getSignificance(); + } + + private static class Connectivity { + public final Item word; + public final double value; + + public Connectivity(Item connectedItem, double connectivity) { + this.word = connectedItem; + this.value = connectivity; + } + + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java new file mode 100644 index 00000000000..1d7dbb946d9 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/querytransform/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.querytransform; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java new file mode 100644 index 00000000000..268fe5f4ea5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/BlendingSearcher.java @@ -0,0 +1,276 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + + +/** + * Flattens a result consisting of multiple hit groups containing hits + * into a single flat list of hits. + * + * @author Bob Travis + * @author Steinar Knutsen + * @author Arne Fossaa + */ +@After(PhaseNames.BLENDED_RESULT) +@Before(PhaseNames.UNBLENDED_RESULT) +@Provides(BlendingSearcher.BLENDING) +public class BlendingSearcher extends Searcher { + + public static final String BLENDING = "Blending"; + + private final String documentId; + + @Inject + public BlendingSearcher(ComponentId id, QrSearchersConfig cfg) { + super(id); + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.BlendingSearcher s = cfg.com().yahoo().prelude().searcher().BlendingSearcher(); + documentId = s.docid().length() > 0 ? s.docid() : null; + + } + + /** + * Only for legacy tests. + */ + public BlendingSearcher(String blendingDocumentId) { + this.documentId = blendingDocumentId; + } + + @Override + public com.yahoo.search.Result search(com.yahoo.search.Query query, Execution execution) { + Result result = execution.search(query); + + Result blended = blendResults(result, query, query.getOffset(), query.getHits(), execution); + blended.trace("Blended result"); + return blended; + } + + /** + * Fills this result by forwarding to the right chained searchers + */ + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + execution.fill(result, summaryClass); + result.analyzeHits(); + } + + /** + * Produce a single blended result list from a group of hitgroups. + * + * It is assumed that the results are ordered in hitgroups. If not, the blend will not be performed + */ + protected Result blendResults(Result result, Query q, int offset, int hits, Execution execution) { + + //Assert that there are more than one hitgroup and that there are only hitgroups on the lowest level + + boolean foundNonGroup = false; + Iterator<Hit> hitIterator = result.hits().iterator(); + List<HitGroup> groups = new ArrayList<>(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit instanceof HitGroup) { + groups.add((HitGroup)hit); + hitIterator.remove(); + } else if(!hit.isMeta()) { + foundNonGroup = true; + } + } + + if(foundNonGroup) { + result.hits().addError(ErrorMessage.createUnspecifiedError("Blendingsearcher could not blend - there are toplevel hits" + + " that are not hitgroups")); + return result; + } + if (groups.size() == 0) { + return result; + } else if (groups.size() == 1) { + result.hits().addAll(groups.get(0).asUnorderedHits()); + result.hits().setOrderer(groups.get(0).getOrderer()); + return result; + } else { + if (documentId != null) { + return blendResultsUniquely(result, q, offset, hits, groups, execution); + } else { + return blendResultsDirectly(result, q, offset, hits, groups, execution); + } + } + } + + private Result sortAndTrimResults(Result result, Query q, int offset, int hits, Execution execution) { + if (q.getRanking().getSorting() != null) { + execution.fillAttributes(result); // Always correct as we can only sort on attributes + result.hits().sort(); + } + result.hits().trim(offset, hits); + return result; + } + + private abstract class DocumentMerger { + protected Set<String> documentsToStrip; + protected Result result; + protected HitGroup group; + + abstract void put(HitGroup source, Hit hit, Execution execution); + + abstract void scan(Hit hit, int i, Execution execution); + + Result getResult() { + return result; + } + + //Since we cannot use prelude.hit#getProperty, we'll have to improvise + private String getProperty(Hit hit, String field) { + Object o = hit.getField(field); + return o == null ? null : o.toString(); + } + + + protected void storeID(Hit hit, Execution execution) { + String id = getProperty(hit, documentId); + + if (id != null) { + documentsToStrip.add(id); + } else { + if (!result.isFilled(result.getQuery().getPresentation().getSummary())) { + fill(result, result.getQuery().getPresentation().getSummary(), execution); + id = getProperty(hit, documentId); + if (id != null) { + documentsToStrip.add(id); + } + } + } + } + + protected boolean known(HitGroup source, Hit hit, Execution execution) { + String stripID = getProperty(hit, documentId); + + if (stripID == null) { + if (!source.isFilled(result.getQuery().getPresentation().getSummary())) { + Result nResult = new Result(result.getQuery()); + nResult.hits().add(source); + fill(nResult, nResult.getQuery().getPresentation().getSummary(), execution); + stripID = getProperty(hit, documentId); + if (stripID == null) { + return false; + } + } else { + return false; + } + } + + if (documentsToStrip.contains(stripID)) { + return true; + } + + documentsToStrip.add(stripID); + return false; + } + + void scanResult(Execution execution) { + List<Hit> hits = group.asUnorderedHits(); + for (int i = hits.size()-1; i >= 0; i--) { + Hit sniffHit = hits.get(i); + if (!sniffHit.isMeta()) { + scan(sniffHit, i, execution); + } else { + result.hits().add(sniffHit); + } + } + } + + void mergeResults(List<HitGroup> groups, Execution execution) { + // note, different loop direction from scanResult() + for(HitGroup group : groups.subList(1, groups.size())) { + for(Hit hit : group.asList()) { + if(hit.isMeta()) { + result.hits().add(hit); + } else { + put(group, hit, execution); + } + } + } + } + } + + + private class BasicMerger extends DocumentMerger { + BasicMerger(Result result, HitGroup group) { + this.result = result; + this.group = group; + } + + void put(HitGroup source, Hit hit, Execution execution) { + result.hits().add(hit); + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + } + } + + + private class UniqueMerger extends DocumentMerger { + UniqueMerger(Result result, HitGroup group, Set<String> documentsToStrip) { + this.documentsToStrip = documentsToStrip; + this.result = result; + this.group = group; + } + + void scan(Hit hit, int i, Execution execution) { + result.hits().add(hit); + if (!hit.isMeta()) { + storeID(hit, execution); + } + } + + void put(HitGroup source, Hit hit, Execution execution) { + if (!hit.isMeta()) { + if (!known(source, hit, execution)) { + addHit(hit); + } + } else { + result.hits().add(hit); + } + } + + protected void addHit(Hit hit) { + result.hits().add(hit); + } + + } + + private Result blendResultsDirectly(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new BasicMerger(result, groups.get(0)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + + private Result blendResultsUniquely(Result result, Query q, int offset, + int hits, List<HitGroup> groups, Execution execution) { + DocumentMerger m = new UniqueMerger(result, groups.get(0), new HashSet<>(20)); + + m.scanResult(execution); + m.mergeResults(groups, execution); + return sortAndTrimResults(m.getResult(), q, offset, hits, execution); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java new file mode 100644 index 00000000000..1152246a32e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/CachingSearcher.java @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.cache.Cache; +import com.yahoo.prelude.cache.QueryCacheKey; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.statistics.Statistics; +import com.yahoo.statistics.Value; + +/** + * A generic caching searcher which caches all passing results. + * + * @author vegardh + */ +@After("rawQuery") +@Before("transformedQuery") +public class CachingSearcher extends Searcher { + + private static final CompoundName nocachewrite=new CompoundName("nocachewrite"); + + private Cache<QueryCacheKey, Result> cache; + private Value cacheHitRatio = null; + + public CachingSearcher(QrSearchersConfig config, Statistics manager) { + long maxSizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().cachesizemegabytes()*1024*1024; + long timeToLiveMillis = config.com().yahoo().prelude().searcher().CachingSearcher().timetoliveseconds()*1000; + long maxEntrySizeBytes = config.com().yahoo().prelude().searcher().CachingSearcher().maxentrysizebytes(); + cache=new Cache<>(maxSizeBytes, timeToLiveMillis, maxEntrySizeBytes, manager); + initRatio(manager); + } + + private void initRatio(Statistics manager) { + cacheHitRatio = new Value("querycache_hit_ratio", manager, + new Value.Parameters().setNameExtension(false).setLogRaw(false).setLogMean(true)); + } + + private synchronized void cacheHit() { + cacheHitRatio.put(1.0d); + } + + private synchronized void cacheMiss() { + cacheHitRatio.put(0.0d); + } + + private boolean noCacheWrite(Query query) { + return query.properties().getBoolean(nocachewrite); + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + if (query.getNoCache()) { + return execution.search(query); + } + QueryCacheKey queryKey = new QueryCacheKey(query); + Result cachedResult=cache.get(queryKey); + if (cachedResult!=null) { + cacheHit(); + return cachedResult; + } + cacheMiss(); + Query originalQuery = query.clone(); // Need a copy, as cache hash key later on, maybe. + Result result = execution.search(query); + execution.fill(result); + if (!noCacheWrite(query)) { + queryKey.setQuery(originalQuery); // Because the query member has changed state + cache.put(queryKey,result); + } + return result; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java new file mode 100644 index 00000000000..f4b3ab3406a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/DocumentSourceSearcher.java @@ -0,0 +1,222 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + + +/** + * <p>Implements a document source. You pass in a query and a Result + * set. When this Searcher is called with that query it will return + * that result set.</p> + * + * <p>This supports multi-phase search.</p> + * + * <p>To avoid having to add type information for the fields, a quck hack is used to + * support testing of attribute prefetching. + * Any field in the configured hits which has a name starting by attribute + * will be returned when attribute prefetch filling is requested.</p> + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +@SuppressWarnings({"rawtypes"}) +public class DocumentSourceSearcher extends Searcher { + // as for the SuppressWarnings annotation above, we are inside + // com.yahoo.prelude, this is old stuff, really no point firing off those + // warnings here... + + private Result defaultFilledResult; + private Map<Query, Result> completelyFilledResults = new HashMap<>(); + private Map<Query, Result> attributeFilledResults = new HashMap<>(); + private Map<Query, Result> unFilledResults = new HashMap<>(); + //private Result defaultUnfilledResult; + + /** Time (in ms) at which the index of this searcher was last modified */ + long editionTimeStamp=0; + + private int queryCount; + + public DocumentSourceSearcher() { + addDefaultResults(); + } + + /** + * Adds a result which can be returned either as empty, + * filled or attribute only filled later. + * Summary fields starting by "a" are attributes, others are not. + * + * @return true when replacing an existing <query, result> pair. + */ + public boolean addResultSet(Query query, Result fullResult) { + Result emptyResult = new Result(query.clone()); + Result attributeResult = new Result(query.clone()); + emptyResult.setTotalHitCount(fullResult.getTotalHitCount()); + attributeResult.setTotalHitCount(fullResult.getTotalHitCount()); + int counter=0; + for (Iterator i = fullResult.hits().deepIterator();i.hasNext();) { + Hit fullHit = (Hit)i.next(); + + Hit emptyHit = (Hit)fullHit.clone(); + emptyHit.clearFields(); + emptyHit.setFillable(); + emptyHit.setRelevance(fullHit.getRelevance()); + + Hit attributeHit = (Hit)fullHit.clone(); + removePropertiesNotStartingByA(attributeHit); + attributeHit.setFillable(); + attributeHit.setRelevance(fullHit.getRelevance()); + for (Object propertyKeyObject : (Set) fullHit.fields().keySet()) { + String propertyKey=propertyKeyObject.toString(); + if (propertyKey.startsWith("attribute")) + attributeHit.setField(propertyKey, fullHit.getField(propertyKey)); + } + if (fullHit.getField(Hit.SDDOCNAME_FIELD)!=null) + attributeHit.setField(Hit.SDDOCNAME_FIELD, fullHit.getField(Hit.SDDOCNAME_FIELD)); + + // A simple summary lookup mechanism, similar to FastSearch's + emptyHit.setField("summaryid", String.valueOf(counter)); + attributeHit.setField("summaryid", String.valueOf(counter)); + fullHit.setField("summaryid", String.valueOf(counter)); + + counter++; + emptyResult.hits().add(emptyHit); + attributeResult.hits().add(attributeHit); + } + unFilledResults.put(getQueryKeyClone(query), emptyResult); + attributeFilledResults.put(getQueryKeyClone(query), attributeResult); + if (completelyFilledResults.put(getQueryKeyClone(query), fullResult.clone()) != null) { + setEditionTimeStamp(System.currentTimeMillis()); + return true; + } + return false; + } + + /** + * Returns a query clone which has offset and hits set to null. This is used by access to + * the maps using the query as key to achieve lookup independent of offset/hits value + */ + private com.yahoo.search.Query getQueryKeyClone(com.yahoo.search.Query query) { + com.yahoo.search.Query key=query.clone(); + key.setWindow(0,0); + key.getModel().setSources(""); + return key; + } + + private void removePropertiesNotStartingByA(Hit hit) { + List<String> toRemove=new java.util.ArrayList<>(); + for (Iterator i= ((Set) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String key=(String)i.next(); + if (!key.startsWith("a")) + toRemove.add(key); + } + for (Iterator<String> i=toRemove.iterator(); i.hasNext(); ) { + String propertyName=i.next(); + hit.removeField(propertyName); + } + } + + private void addDefaultResults() { + Query q = new Query("?query=default"); + Result r = new Result(q); + r.hits().add(new Hit("http://default-1.html")); + r.hits().add(new Hit("http://default-2.html")); + r.hits().add(new Hit("http://default-3.html")); + r.hits().add(new Hit("http://default-4.html")); + defaultFilledResult = r; + addResultSet(q, r); + } + + public long getEditionTimeStamp(){ + long myEditionTime; + synchronized(this){ + myEditionTime=this.editionTimeStamp; + } + return myEditionTime; + } + + public void setEditionTimeStamp(long editionTime) { + synchronized(this){ + this.editionTimeStamp=editionTime; + } + } + + public Result search(com.yahoo.search.Query query, Execution execution) { + queryCount++; + Result r; + r = unFilledResults.get(getQueryKeyClone(query)); + if (r == null) { + r = defaultFilledResult.clone(); + } else { + r = r.clone(); + } + r.setQuery(query); + r.hits().trim(query.getOffset(), query.getHits()); + return r; + } + + @Override + public void fill(com.yahoo.search.Result result, String summaryClass, Execution execution) { + Result filledResult; + if ("attributeprefetch".equals(summaryClass)) + filledResult=attributeFilledResults.get(getQueryKeyClone(result.getQuery())); + else + filledResult = completelyFilledResults.get(getQueryKeyClone(result.getQuery())); + + if (filledResult == null) { + filledResult = defaultFilledResult; + } + fillHits(filledResult,result,summaryClass); + } + + private void fillHits(Result source,Result target,String summaryClass) { + for (Iterator hitsToFill= target.hits().deepIterator() ; hitsToFill.hasNext();) { + Hit hitToFill = (Hit) hitsToFill.next(); + String summaryId= (String) hitToFill.getField("summaryid"); + if (summaryId==null) continue; // Can not fill this + Hit filledHit = lookupBySummaryId(source,summaryId); + if (filledHit==null) + throw new RuntimeException("Can't fill hit with summaryid '" + summaryId + "', not present"); + + for (Iterator props= filledHit.fieldIterator();props.hasNext();) { + Map.Entry propertyEntry = (Map.Entry)props.next(); + hitToFill.setField(propertyEntry.getKey().toString(), + propertyEntry.getValue()); + } + hitToFill.setFilled(summaryClass); + } + target.analyzeHits(); + } + + private Hit lookupBySummaryId(Result result,String summaryId) { + for (Iterator i= result.hits().deepIterator(); i.hasNext(); ) { + Hit hit=(Hit)i.next(); + if (summaryId.equals(hit.getField("summaryid"))) { + return hit; + } + } + return null; + } + + /** + * Returns the number of queries made to this searcher since the last + * reset. For testing - not reliable if multiple threads makes + * queries simultaneously + */ + public int getQueryCount() { + return queryCount; + } + + public void resetQueryCount() { + queryCount=0; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java new file mode 100644 index 00000000000..10a436b3ae8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FieldCollapsingSearcher.java @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.Iterator; +import java.util.Map; + + +/** + * A searcher which does parametrized collapsing. Based on + * SiteCollapsingSearcher. Deprecated - use grouping. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@SuppressWarnings("deprecation") +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +public class FieldCollapsingSearcher extends Searcher { + + private static final CompoundName collapse = new CompoundName("collapse"); + private static final CompoundName collapsefield=new CompoundName("collapsefield"); + private static final CompoundName collapsesize=new CompoundName("collapsesize"); + private static final CompoundName collapseSummaryName=new CompoundName("collapse.summary"); + + /** Maximum number of queries to send next searcher */ + private int maxQueries = 4; + + /** + * The max number of hits that will be preserved per unique + * value of the collapsing parameter. + */ + private int defaultCollapseSize; + + /** + * The factor by which to scale up the requested number of hits + * from the next searcher in the chain, because collapsing will + * likely delete many hits. + */ + private double extraFactor; + + /** Create this searcher using default values for all settings */ + public FieldCollapsingSearcher() { + this((String) null); + } + + /** + * Creates a collapser + * + * @param collapseField the default field to collapse on, or null to not collapse as default + */ + public FieldCollapsingSearcher(String collapseField) { + this(1, 2.0, collapseField); + } + + @Inject + public FieldCollapsingSearcher(QrSearchersConfig config) { + QrSearchersConfig.Com.Yahoo.Prelude.Searcher.FieldCollapsingSearcher + s = config.com().yahoo().prelude().searcher().FieldCollapsingSearcher(); + + init(s.collapsesize(), s.extrafactor()); + } + + /** + * Creates a collapser + * + * @param collapseSize the maximum number of hits to keep per + * field the default max number of hits in each collapsed group + * @param extraFactor the percentage by which to scale up the + * requested number of hits, to allow some hits to be removed + * without refetching + * @param collapseField the field to collapse on. This is currently <b>ignored</b>. + */ + public FieldCollapsingSearcher(int collapseSize, double extraFactor, String collapseField) { + init(collapseSize, extraFactor); + } + + private void init(int collapseSize, double extraFactor) { + this.defaultCollapseSize = collapseSize; + this.extraFactor = extraFactor; + } + + /** + * First fetch result from the next searcher in the chain. + * If collapse is active, do collapsing. + * Otherwise, act as a simple pass through + */ + public Result search(com.yahoo.search.Query query, Execution execution) { + String collapseField = query.properties().getString(collapsefield); + + if (collapseField==null) return execution.search(query); + + int collapseSize = query.properties().getInteger(collapsesize,defaultCollapseSize); + query.properties().set(collapse, "0"); + + int hitsToRequest = query.getHits() != 0 ? (int) Math.ceil((query.getOffset() + query.getHits() + 1) * extraFactor) : 0; + int nextOffset = 0; + int hitsAfterCollapse; + boolean moreHitsAvailable = true; + Map<String, Integer> knownCollapses = new java.util.HashMap<>(); + Result result = new Result(query); + int performedQueries = 0; + Result resultSource; + String collapseSummary = query.properties().getString(collapseSummaryName); + + do { + resultSource = search(query.clone(), execution, nextOffset, hitsToRequest); + String summaryClass = (collapseSummary == null) + ? query.getPresentation().getSummary() : collapseSummary; + fill(resultSource, summaryClass, execution); + collapse(result, knownCollapses, resultSource, collapseField, collapseSize); + + hitsAfterCollapse = result.getHitCount(); + if (resultSource.getTotalHitCount() < (hitsToRequest + nextOffset)) { + // the searcher downstream has no more hits + moreHitsAvailable = false; + } + nextOffset += hitsToRequest; + if (hitsAfterCollapse < query.getOffset() + query.getHits()) { + hitsToRequest = (int) Math.ceil(hitsToRequest * extraFactor); + } + ++performedQueries; + + } while (hitsToRequest != 0 + && (hitsAfterCollapse < query.getOffset() + query.getHits()) + && moreHitsAvailable + && (performedQueries <= maxQueries)); + + // Set correct meta information + result.mergeWith(resultSource); + // Keep only (offset,.. offset+hits) hits + result.hits().trim(query.getOffset(), query.getHits()); + // Mark query as query with collapsing + query.properties().set(collapse, "1"); + return result; + } + + private Result search(Query query, Execution execution, int offset , int hits) { + query.setOffset(offset); + query.setHits(hits); + return execution.search(query); + } + + /** + * Collapse logic. Preserves only maxHitsPerField hits + * for each unique value of the collapsing parameter. + */ + private void collapse(Result result, Map<String, Integer> knownCollapses, + Result resultSource, String collapseField, int collapseSize) { + for (Iterator<Hit> it = resultSource.hits().iterator(); it.hasNext();) { + Hit unknownHit = it.next(); + + if (!(unknownHit instanceof FastHit)) { + result.hits().add(unknownHit); + continue; + } + FastHit hit = (FastHit) unknownHit; + Object peek = hit.getField(collapseField); + String collapseId = peek != null ? peek.toString() : null; + if (collapseId == null) { + result.hits().add(hit); + continue; + } + + if (knownCollapses.containsKey(collapseId)) { + int numHitsThisField = knownCollapses.get(collapseId).intValue(); + + if (numHitsThisField < collapseSize) { + result.hits().add(hit); + ++numHitsThisField; + knownCollapses.put(collapseId, new Integer(numHitsThisField)); + } + } else { + knownCollapses.put(collapseId, new Integer(1)); + result.hits().add(hit); + } + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java new file mode 100644 index 00000000000..f7bff5b481c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/FillSearcher.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * This searcher fills the results in the first phase. May be put into + * a search chain to ensure full results are present at an earlier + * time than they would normally be. + * + * @author <a href="mailto:havardpe@yahoo-inc.com">havardpe</a> + **/ +public class FillSearcher extends Searcher { + private final Searcher next; + + public FillSearcher() { + next = null; + } + + public FillSearcher(Searcher next) { + this.next = next; + } + + @Override + public Result search(Query query, Execution execution) { + Result result; + if (next == null) { + result = execution.search(query); + execution.fill(result); + } else { + Execution e = new Execution(next, execution.context()); + result = e.search(query); + e.fill(result); + } + return result; + } + + // TODO: Remove this method as it does nothing new + @Override + public void fill(Result result, String summaryClass, Execution execution) { + if (next == null) { + execution.fill(result, summaryClass); + } else { + Execution e = new Execution(next, execution.context()); + e.fill(result, summaryClass); + } + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java new file mode 100644 index 00000000000..dbfde502b75 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JSONDebugSearcher.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.FeatureData; +import com.yahoo.search.result.StructuredData; +import com.yahoo.search.searchchain.Execution; + +import java.util.Iterator; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class JSONDebugSearcher extends Searcher { + public static final String JSON_FIELD = "JSON field: "; + public static final String STRUCT_FIELD = "Structured data field (as json): "; + public static final String FEATURE_FIELD = "Feature data field (as json): "; + + private static CompoundName PROPERTYNAME = new CompoundName("dumpjson"); + + public Result search(com.yahoo.search.Query query, Execution execution) { + Result r = execution.search(query); + String propertyName = query.properties().getString(PROPERTYNAME); + if (propertyName != null) { + execution.fill(r); + for (Iterator<Hit> i = r.hits().deepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h instanceof FastHit) { + FastHit hit = (FastHit) h; + Object o = hit.getField(propertyName); + if (o instanceof JSONString) { + JSONString j = (JSONString) o; + r.getQuery().trace(JSON_FIELD + j.getContent(), false, 5); + } + if (o instanceof StructuredData) { + StructuredData d = (StructuredData) o; + r.getQuery().trace(STRUCT_FIELD + d.toJson(), false, 5); + } + if (o instanceof FeatureData) { + FeatureData d = (FeatureData) o; + r.getQuery().trace(FEATURE_FIELD + d.toJson(), false, 5); + } + } + } + } + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java new file mode 100644 index 00000000000..75ae960cac0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java @@ -0,0 +1,212 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +import com.google.inject.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.search.Searcher; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.BoldCloseFieldPart; +import com.yahoo.prelude.hitfield.BoldOpenFieldPart; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.SeparatorFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +/** + * Converts juniper highlighting to XML style + * <p> + * Note: This searcher only converts backend binary highlighting and separators + * to the configured highlighting and separator tags. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(JuniperSearcher.JUNIPER_TAG_REPLACING) +public class JuniperSearcher extends Searcher { + + public final static char RAW_HIGHLIGHT_CHAR = '\u001F'; + public final static char RAW_SEPARATOR_CHAR = '\u001E'; + + private static final String ELLIPSIS = "..."; + + // The name of the field containing document type + private static final String MAGIC_FIELD = Hit.SDDOCNAME_FIELD; + + public static final String JUNIPER_TAG_REPLACING = "JuniperTagReplacing"; + + private String boldOpenTag; + private String boldCloseTag; + private String separatorTag; + + @Inject + public JuniperSearcher(ComponentId id, QrSearchersConfig config) { + super(id); + + boldOpenTag = config.tag().bold().open(); + boldCloseTag = config.tag().bold().close(); + separatorTag = config.tag().separator(); + } + + /** + * Convert Juniper style property highlighting to XML style. + */ + @Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + highlight(query.getPresentation().getBolding(), result.hits().deepIterator(), null, + execution.context().getIndexFacts().newSession(query)); + return result; + } + + @Override + public void fill(Result result, String summaryClass, Execution execution) { + Result workResult = result; + final int worstCase = workResult.getHitCount(); + final List<Hit> hits = new ArrayList<>(worstCase); + for (final Iterator<Hit> i = workResult.hits().deepIterator(); i.hasNext();) { + final Hit sniffHit = i.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + final FastHit hit = (FastHit) sniffHit; + if (hit.isFilled(summaryClass)) continue; + + hits.add(hit); + } + execution.fill(workResult, summaryClass); + highlight(workResult.getQuery().getPresentation().getBolding(), hits.iterator(), summaryClass, + execution.context().getIndexFacts().newSession(result.getQuery())); + } + + private void highlight(boolean bolding, Iterator<Hit> hitsToHighlight, + String summaryClass, IndexFacts.Session indexFacts) { + while (hitsToHighlight.hasNext()) { + Hit sniffHit = hitsToHighlight.next(); + if ( ! (sniffHit instanceof FastHit)) continue; + + FastHit hit = (FastHit) sniffHit; + if (summaryClass != null && ! hit.isFilled(summaryClass)) continue; + + Object searchDefinitionField = hit.getField(MAGIC_FIELD); + if (searchDefinitionField == null) continue; + String searchDefinitionName = searchDefinitionField.toString(); + + for (String fieldName : hit.fields().keySet()) { + Index index = indexFacts.getIndex(fieldName, searchDefinitionName); + if (index.getDynamicSummary() || index.getHighlightSummary()) + insertTags(hit.buildHitField(fieldName, true, true), bolding, index.getDynamicSummary()); + } + } + } + + private void insertTags(final HitField oldProperty, final boolean bolding, final boolean dynteaser) { + boolean insideHighlight = false; + for (final ListIterator<FieldPart> i = oldProperty.listIterator(); i.hasNext();) { + final FieldPart f = i.next(); + if (f instanceof SeparatorFieldPart) { + setSeparatorString(bolding, (SeparatorFieldPart) f); + } + if (f.isFinal()) { + continue; + } + + final String toQuote = f.getContent(); + List<FieldPart> newFieldParts = null; + int previous = 0; + for (int j = 0; j < toQuote.length(); j++) { + final char key = toQuote.charAt(j); + switch (key) { + case RAW_HIGHLIGHT_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addBolding(bolding, insideHighlight, f, toQuote, newFieldParts, previous, j); + previous = j + 1; + insideHighlight = !insideHighlight; + break; + case RAW_SEPARATOR_CHAR: + newFieldParts = initFieldParts(newFieldParts); + addSeparator(bolding, dynteaser, f, toQuote, newFieldParts, + previous, j); + previous = j + 1; + break; + default: + // no action + break; + } + } + if (previous > 0 && previous < toQuote.length()) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous), f.isToken())); + } + if (newFieldParts != null) { + i.remove(); + for (final Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext();) { + i.add(j.next()); + } + } + } + } + + private void setSeparatorString(final boolean bolding,final SeparatorFieldPart f) { + if (bolding) { + f.setContent(separatorTag); + } else { + f.setContent(ELLIPSIS); + } + } + + private void addSeparator(final boolean bolding, final boolean dynteaser, + final FieldPart f, final String toQuote, + final List<FieldPart> newFieldParts, final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (dynteaser) { + final FieldPart s = (bolding ? new SeparatorFieldPart(separatorTag) : new SeparatorFieldPart(ELLIPSIS)); + newFieldParts.add(s); + } + } + + private void addBolding(final boolean bolding, + final boolean insideHighlight, final FieldPart f, + final String toQuote, final List<FieldPart> newFieldParts, + final int previous, final int j) { + if (previous != j) { + newFieldParts.add(new StringFieldPart(toQuote.substring(previous, j), f.isToken())); + } + if (bolding) { + if (insideHighlight) { + newFieldParts.add(new BoldCloseFieldPart(boldCloseTag)); + } else { + if (newFieldParts.size() > 0 + && newFieldParts.get(newFieldParts.size() - 1) instanceof BoldCloseFieldPart) { + newFieldParts.remove(newFieldParts.size() - 1); + } else { + newFieldParts.add(new BoldOpenFieldPart(boldOpenTag)); + } + } + } + } + + private List<FieldPart> initFieldParts(List<FieldPart> newFieldParts) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + return newFieldParts; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java new file mode 100644 index 00000000000..a282dc22b53 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/KeyValueSearcher.java @@ -0,0 +1,166 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.document.BucketId; +import com.yahoo.document.BucketIdFactory; +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.document.idstring.IdString; +import com.yahoo.documentapi.messagebus.protocol.SearchColumnPolicy; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.query.IntItem; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.Model; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.result.DefaultErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vdslib.BucketDistribution; +import com.yahoo.component.chain.dependencies.Before; + +import java.util.Iterator; +import java.util.logging.Logger; + + +/** + * Searcher that does efficient key/value lookup using Vespa search as a + * backend. It does so by bypassing the first phase ranking, and only performs + * the second phase summary fetching. + * + * The keys to find are input as a comma-seprated list using the <i>keys</i> + * query parameter. Each key should match a part of a document id. Given the key + * 'foo', and document id namespace 'mynamespace', the document id matched will + * be 'id:mynamespace:keyvalue::foo'. + * + * To scale the throughput with the number of partitions, the searcher uses the + * same hashing mechanisms as the document API to find out which node each key + * belongs to. The searcher then dispatches a summary request to retrieve keys + * and returns the result. + * + * @author <a href="lulf@yahoo-inc.com">Ulf Lilleengen</a> + */ +@Before(GroupingExecutor.COMPONENT_NAME) +public class KeyValueSearcher extends Searcher { + + private static final Logger log = Logger.getLogger(KeyValueSearcher.class.getName()); + private final BucketIdFactory factory = new BucketIdFactory(); + private final BucketDistribution distribution; + private final String summaryClass; + private final String idSchemePrefix; + private final int numRowBits; + private final int traceLevel = 5; + + public KeyValueSearcher(KeyvalueConfig config) { + this.summaryClass = config.summaryName(); + this.idSchemePrefix = createIdSchemePrefix(config); + this.distribution = new BucketDistribution(config.numparts(), SearchColumnPolicy.DEFAULT_NUM_BUCKET_BITS); + this.numRowBits = calcNumRowBits(config.numrows()); + log.config("Configuring " + KeyValueSearcher.class.getName() + " with " + config.numparts() + " partitions and doc id scheme '" + idSchemePrefix + "'"); + } + + private String createIdSchemePrefix(KeyvalueConfig config) { + if (config.docIdScheme().equals(KeyvalueConfig.DocIdScheme.Enum.DOC_SCHEME)) { + return "doc:" + config.docIdNameSpace() + ":"; + } else { + return "id:" + config.docIdNameSpace() + ":" + config.docIdType() + "::"; + } + } + + public Hit createHit(Query query, String key) { + String docId = createDocId(key.trim()); + BucketId id = factory.getBucketId(new DocumentId(docId)); + int partition = getPartition(id); + + FastHit hit = new FastHit(); + hit.setGlobalId(new GlobalId(IdString.createIdString(docId))); + hit.setQuery(query); + hit.setFillable(); + hit.setCached(false); + hit.setPartId(partition << numRowBits, numRowBits); + hit.setRelevance(1.0); + hit.setIgnoreRowBits(true); + hit.setDistributionKey(42); + return hit; + } + + private String createDocId(String key) { + return idSchemePrefix + key; + } + + + @Override + public Result search(Query query, Execution execution) { + String keyProp = query.properties().getString("keys"); + query.getPresentation().setSummary(summaryClass); + if (keyProp == null || keyProp.length() == 0) { + return new Result(query, new ErrorMessage(ErrorMessage.NULL_QUERY, "'keys' parameter not set or empty.")); + } + String[] keyList = keyProp.split(","); + Model model = query.getModel(); + QueryTree tree = model.getQueryTree(); + QueryCanonicalizer.canonicalize(tree); + if (tree.isEmpty()) { + tree.setRoot(new IntItem(String.valueOf(keyProp.hashCode()))); + } + + Result result = new Result(query); + for (String key : keyList) { + result.hits().add(createHit(query, key)); + } + execution.fill(result, summaryClass); + if (query.isTraceable(traceLevel)) { + traceResult(query, result); + } + int totalHits = 0; + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + totalHits++; + } else { + hitIterator.remove(); + } + } + if (totalHits != keyList.length) { + ErrorMessage error = new ErrorMessage(1, "Some keys could not be fetched"); + result.hits().setError(error); + } + result.setTotalHitCount(totalHits); + return result; + } + + private void traceResult(Query query, Result result) { + Iterator<Hit> hitIterator = result.hits().iterator(); + while (hitIterator.hasNext()) { + Hit hit = hitIterator.next(); + if (hit.isFillable() && hit.isFilled(summaryClass)) { + query.trace("Found filled hit: " + hit, traceLevel); + } else { + query.trace("Found hit that was not filled/fillable: " + hit, traceLevel); + } + } + query.trace("Error hit: " + result.hits().getErrorHit(), traceLevel); + } + + private int getPartition(BucketId bucketId) { + return distribution.getColumn(bucketId); + } + + private static int calcNumRowBits(int numRows) { + if (numRows < 1) { + throw new IllegalArgumentException(); + } + for (int i = 0; i < 30; ++i) { + if (numRows - 1 < 1 << i) { + return i; + } + } + return 31; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java new file mode 100644 index 00000000000..ac2196bb9f5 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/MultipleResultsSearcher.java @@ -0,0 +1,376 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.search.searchchain.Execution; + +import java.util.*; + +/** + * <p> Groups hits according to sddocname. </p> + * + * <p> For each group, the desired number of hits can be specified. </p> + * + * @author tonytv + */ +public class MultipleResultsSearcher extends Searcher { + + private final static String propertyPrefix = "multipleresultsets."; + private static final CompoundName additionalHitsFactorName=new CompoundName(propertyPrefix + "additionalHitsFactor"); + private static final CompoundName maxTimesRetrieveHeterogeneousHitsName=new CompoundName(propertyPrefix + "maxTimesRetrieveHeterogeneousHits"); + private static final CompoundName numHits=new CompoundName(propertyPrefix + "numHits"); + + public @Override Result search(Query query, Execution e) { + try { + Parameters parameters = new Parameters(query); + + query.trace("MultipleResultsSearcher: " + parameters, false, 2); + HitsRetriever hitsRetriever = new HitsRetriever(query,e,parameters); + + for (DocumentGroup documentGroup : parameters.documentGroups) { + if ( hitsRetriever.numHits(documentGroup) < documentGroup.targetNumberOfDocuments) { + hitsRetriever.retrieveMoreHits(documentGroup); + } + } + + return hitsRetriever.createMultipleResultSets(); + } catch(ParameterException exception) { + Result result = new Result(query); + result.hits().setError(ErrorMessage.createInvalidQueryParameter(exception.msg)); + return result; + } + } + + private class HitsRetriever { + + PartitionedResult partitionedResult; + + private int numRetrieveMoreHitsCalls = 0; + private int nextOffset; + private Query query; + private final Parameters parameters; + private final int hits; + private final int offset; + private Execution execution; + private Result initialResult; + + HitsRetriever(Query query, Execution execution, Parameters parameters) throws ParameterException { + this.offset=query.getOffset(); + this.hits=query.getHits(); + this.nextOffset = query.getOffset() + query.getHits(); + this.query = query; + this.parameters = parameters; + this.execution = execution; + + initialResult = retrieveHits(); + partitionedResult = new PartitionedResult(parameters.documentGroups, initialResult); + + this.query = query; + } + + void retrieveMoreHits(DocumentGroup documentGroup) { + if ( ++numRetrieveMoreHitsCalls < + parameters.maxTimesRetrieveHeterogeneousHits) { + + retrieveHeterogenousHits(); + + if (numHits(documentGroup) < + documentGroup.targetNumberOfDocuments) { + + retrieveMoreHits(documentGroup); + } + + } else { + retrieveRemainingHitsForGroup(documentGroup); + } + } + + void retrieveHeterogenousHits() { + int numHitsToRetrieve = (int)(hits * parameters.additionalHitsFactor); + + final int maxNumHitsToRetrieve = 1000; + numHitsToRetrieve = Math.min(numHitsToRetrieve,maxNumHitsToRetrieve); + + try { + query.setWindow(nextOffset,numHitsToRetrieve); + partitionedResult.addHits(retrieveHits()); + } + finally { + restoreWindow(); + nextOffset += numHitsToRetrieve; + } + } + + private void restoreWindow() { + query.setWindow(offset,hits); + } + + void retrieveRemainingHitsForGroup(DocumentGroup documentGroup) { + Set<String> oldRestrictList = query.getModel().getRestrict(); + try { + int numMissingHits = documentGroup.targetNumberOfDocuments - numHits(documentGroup); + int offset = numHits(documentGroup); + + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().add(documentGroup.documentName); + query.setWindow(offset, numMissingHits); + partitionedResult.addHits(retrieveHits()); + + } finally { + restoreWindow(); + query.getModel().getRestrict().clear(); + query.getModel().getRestrict().addAll(oldRestrictList); + } + } + + int numHits(DocumentGroup documentGroup) { + return partitionedResult.numHits(documentGroup.documentName); + } + + Result createMultipleResultSets() { + Iterator<Hit> i = initialResult.hits().iterator(); + while (i.hasNext()) { + i.next(); + i.remove(); + } + + for (DocumentGroup group: parameters.documentGroups) { + partitionedResult.cropResultSet(group.documentName,group.targetNumberOfDocuments); + } + + partitionedResult.insertInto(initialResult.hits()); + return initialResult; + } + + private Result retrieveHits() { + Result result = execution.search(query); + // ensure that field sddocname is available + execution.fill(result); // TODO: Suffices to fill attributes + + if (result.hits().getErrorHit() != null) + initialResult.hits().getErrorHit().addErrors( + result.hits().getErrorHit()); + + + return result; + } + } + + // Assumes that field sddocname is available + private static class PartitionedResult { + + private Map<String, HitGroup> resultSets = new HashMap<>(); + + private List<Hit> otherHits = new ArrayList<>(); + + PartitionedResult(List<DocumentGroup> documentGroups,Result result) throws ParameterException { + for (DocumentGroup group : documentGroups) + addGroup(group); + + addHits(result, true); + } + + void addHits(Result result, boolean addOtherHits) { + Iterator<Hit> i = result.hits().iterator(); + while (i.hasNext()) { + add(i.next(), addOtherHits); + } + } + + void addHits(Result result) { + addHits(result, false); + } + + + void add(Hit hit, boolean addOtherHits) { + String documentName = (String)hit.getField(Hit.SDDOCNAME_FIELD); + + if (documentName != null) { + HitGroup resultSet = resultSets.get(documentName); + + if (resultSet != null) { + resultSet.add(hit); + return; + } + } + + if (addOtherHits) { + otherHits.add(hit); + } + } + + int numHits(String documentName) { + return resultSets.get(documentName).size(); + } + + void insertInto(HitGroup group) { + for (Hit hit: otherHits) { + group.add(hit); + } + + for (HitGroup hit: resultSets.values() ) { + hit.copyOrdering(group); + group.add(hit); + } + } + + void cropResultSet(String documentName, int numDocuments) { + resultSets.get(documentName).trim(0, numDocuments); + } + + private void addGroup(DocumentGroup group) throws ParameterException { + final String documentName = group.documentName; + if ( resultSets.put(group.documentName, + new HitGroup(documentName) { + /** + * + */ + private static final long serialVersionUID = 5732822886080288688L; + }) + != null ) { + + throw new ParameterException("Document name " + group.documentName + "mentioned multiple times"); + } + } + + } + + + //examples: + //multipleresultsets.numhits=music:10,movies:20 + //multipleresultsets.additionalhitsFactor=0.8 + //multipleresultsets.maxtimesretrieveheterogeneoushits=2 + private static class Parameters { + Parameters(Query query) + throws ParameterException { + + readNumHitsSpecification(query); + readMaxTimesRetrieveHeterogeneousHits(query); + readAdditionalHitsFactor(query); + } + + + List<DocumentGroup> documentGroups = new ArrayList<>(); + double additionalHitsFactor = 0.8; + int maxTimesRetrieveHeterogeneousHits = 2; + + private void readAdditionalHitsFactor(Query query) + throws ParameterException { + + String additionalHitsFactorStr = query.properties().getString(additionalHitsFactorName); + + if (additionalHitsFactorStr == null) + return; + + try { + additionalHitsFactor = + Double.parseDouble(additionalHitsFactorStr); + } catch (NumberFormatException e) { + throw new ParameterException( + "Expected floating point number, got '" + + additionalHitsFactorStr + "'."); + } + } + + private void readMaxTimesRetrieveHeterogeneousHits(Query query) { + maxTimesRetrieveHeterogeneousHits = query.properties().getInteger( + maxTimesRetrieveHeterogeneousHitsName, + maxTimesRetrieveHeterogeneousHits); + } + + + private void readNumHitsSpecification(Query query) + throws ParameterException { + + //example numHitsSpecification: "music:10,movies:20" + String numHitsSpecification = + query.properties().getString(numHits); + + if (numHitsSpecification == null) + return; + + String[] numHitsForDocumentNames = numHitsSpecification.split(","); + + for (String s:numHitsForDocumentNames) { + handleDocumentNameWithNumberOfHits(s); + } + + } + + public String toString() { + String s = "additionalHitsFactor=" + additionalHitsFactor + + ", maxTimesRetrieveHeterogeneousHits=" + + maxTimesRetrieveHeterogeneousHits + + ", numHitsSpecification='"; + + for (DocumentGroup group : documentGroups) { + s += group.documentName + ":" + + group.targetNumberOfDocuments + ", "; + } + + s += "'"; + + return s; + } + + //example input: music:10 + private void handleDocumentNameWithNumberOfHits(String s) + throws ParameterException { + + String[] documentNameWithNumberOfHits = s.split(":"); + + if (documentNameWithNumberOfHits.length != 2) { + String msg = "Expected a single ':' in '" + s + "'."; + + if (documentNameWithNumberOfHits.length > 2) + msg += " Please check for missing commas."; + + throw new ParameterException(msg); + } else { + String documentName = + documentNameWithNumberOfHits[0].trim(); + try { + int numHits = Integer.parseInt( + documentNameWithNumberOfHits[1].trim()); + + numRequestedHits(documentName, numHits); + } catch (NumberFormatException e) { + throw new ParameterException( + "Excpected an integer but got '" + + documentNameWithNumberOfHits[1] + "'"); + } + } + } + + private void numRequestedHits(String documentName, int numHits) { + documentGroups.add(new DocumentGroup(documentName, numHits)); + } + + } + + private static class DocumentGroup { + String documentName; + int targetNumberOfDocuments; + + DocumentGroup(String documentName, int targetNumberOfDocuments) { + this.documentName = documentName; + this.targetNumberOfDocuments = targetNumberOfDocuments; + } + } + + @SuppressWarnings("serial") + private static class ParameterException extends Exception { + String msg; + + ParameterException(String msg) { + this.msg = msg; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java new file mode 100644 index 00000000000..03e212fc854 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/PosSearcher.java @@ -0,0 +1,174 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.geo.DegreesParser; +import com.yahoo.geo.BoundingBoxParser; +import com.yahoo.yolean.Exceptions; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.prelude.Location; + +/** + * A searcher converting human-readable position parameters + * into internal format. + * <br> + * Reads the following query properties: + * <ul> + * <li> pos.ll (geographical latitude and longitude) + * <li> pos.xy (alternate to pos.ll - direct x and y in internal units) + * <li> pos.radius (distance in one of: + * internal units (no suffix), meter (m), kilometer (km) or miles (mi) + * </ul> + * + * @author Arne J + */ +@After(PhaseNames.RAW_QUERY) +@Before(PhaseNames.TRANSFORMED_QUERY) +@Provides(PosSearcher.POSITION_PARSING) +public class PosSearcher extends Searcher { + public static final String POSITION_PARSING = "PositionParsing"; + + private static final CompoundName posBb = new CompoundName("pos.bb"); + private static final CompoundName posLl = new CompoundName("pos.ll"); + private static final CompoundName posXy = new CompoundName("pos.xy"); + private static final CompoundName posAttributeName = new CompoundName("pos.attribute"); + private static final CompoundName posRadius = new CompoundName("pos.radius"); + private static final CompoundName posUnits = new CompoundName("pos.units"); + + // according to wikipedia: + // Earth's equatorial radius = 6378137 meter - not used + // meters per mile = 1609.344 + // 180 degrees equals one half diameter equals PI*r + // Earth's polar radius = 6356752 meter + + public final static double km2deg = 1000.000 * 180.0 / (Math.PI * 6356752.0); + public final static double mi2deg = 1609.344 * 180.0 / (Math.PI * 6356752.0); + + + public Result search(Query query, Execution execution) { + String bb = query.properties().getString(posBb); + String ll = query.properties().getString(posLl); + String xy = query.properties().getString(posXy); + + if (ll == null && xy == null && bb == null) { + return execution.search(query); // Nothing to do + } + if (query.getRanking().getLocation() != null) { + // this searcher is a NOP if there is already a location + // in the query + query.trace("query already has a location set, not processing 'pos' params", false, 1); + return execution.search(query); + } + + Location loc = new Location(); + loc.setDimensions(2); + String posAttribute = query.properties().getString(posAttributeName); + loc.setAttribute(posAttribute); + + try { + if (ll == null && xy == null && bb != null) { + parseBoundingBox(bb, loc); + } else { + if (ll != null && xy != null) { + throw new IllegalArgumentException("Cannot handle both lat/long and xy coords at the same time"); + } + if (ll != null) { + handleGeoCircle(query, ll, loc); + } + if (xy != null) { + handleXyCircle(query, xy, loc); + } + if (bb != null) { + parseBoundingBox(bb, loc); + } + } + } + catch (IllegalArgumentException e) { + // System.err.println("error: "+e); + return new Result(query, ErrorMessage.createInvalidQueryParameter( + "Error in pos parameters: " + Exceptions.toMessageString(e))); + } + // and finally: + query.getRanking().setLocation(loc); + return execution.search(query); + } + + private void handleGeoCircle(Query query, String ll, Location target) { + double ewCoord = 0; + double nsCoord = 0; + try { + DegreesParser parsed = new DegreesParser(ll); + ewCoord = parsed.longitude; + nsCoord = parsed.latitude; + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Unable to parse lat/long string '" +ll + "'", e); + } + String radius = query.properties().getString(posRadius); + double radiusdegrees = 0.0; + + if (radius == null) { + radiusdegrees = 50.0 * km2deg; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiuskm * km2deg; + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + radiusdegrees = radiusm * km2deg / 1000.0; + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + radiusdegrees = radiusmiles * mi2deg; + } else { + radiusdegrees = Integer.parseInt(radius) * 0.000001; + } + target.setGeoCircle(nsCoord, ewCoord, radiusdegrees); + } + + + private void handleXyCircle(Query query, String xy, Location target) { + int xcoord = 0; + int ycoord = 0; + // parse xy + int semipos = xy.indexOf(';'); + if (semipos > 0 && semipos < xy.length()) { + xcoord = Integer.parseInt(xy.substring(0, semipos)); + ycoord = Integer.parseInt(xy.substring(semipos+1, xy.length())); + } else { + throw new IllegalArgumentException("pos.xy must be in the format 'digits;digits' but was: '"+xy+"'"); + } + String radius = query.properties().getString(posRadius); + int radiusUnits = 0; + if (radius == null) { + radiusUnits = 5000; + } else if (radius.endsWith("km")) { + double radiuskm = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiuskm * km2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("m")) { + double radiusm = Double.valueOf(radius.substring(0, radius.length()-1)); + double radiusdegrees = radiusm * km2deg / 1000.0; + radiusUnits = (int)(radiusdegrees * 1000000); + } else if (radius.endsWith("mi")) { + double radiusmiles = Double.valueOf(radius.substring(0, radius.length()-2)); + double radiusdegrees = radiusmiles * mi2deg; + radiusUnits = (int)(radiusdegrees * 1000000); + } else { + radiusUnits = Integer.parseInt(radius); + } + target.setXyCircle(xcoord, ycoord, radiusUnits); + } + + + private static void parseBoundingBox(String bb, Location target) { + BoundingBoxParser parser = new BoundingBoxParser(bb); + target.setBoundingBox(parser.n, parser.s, parser.e, parser.w); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java new file mode 100644 index 00000000000..7d0ae0a6d99 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuerySnapshotSearcher.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.Relevance; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Save the query in the incoming state to a meta hit in the result. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ + +public class QuerySnapshotSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + Query q = query.clone(); + Result r = execution.search(query); + Hit h = new Hit("meta:querysnapshot", new Relevance( + Double.POSITIVE_INFINITY)); + h.setMeta(true); + h.setField("query", q); + r.hits().add(h); + return r; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java new file mode 100644 index 00000000000..5678cc918da --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QueryValidatingSearcher.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * Ensures hits is 1000 or less and offset is 1000 or less. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QueryValidatingSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + if (query.getHits() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Too many hits (more than 1000) requested."); + result.hits().addError(error); + return result; + } + if (query.getOffset() > 1000) { + Result result = new Result(query); + ErrorMessage error + = ErrorMessage.createInvalidQueryParameter("Offset too high (above 1000)."); + result.hits().addError(error); + return result; + } + return execution.search(query); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java new file mode 100644 index 00000000000..6c5a6492b92 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/QuotingSearcher.java @@ -0,0 +1,193 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.*; + +import com.yahoo.component.ComponentId; +import com.yahoo.search.result.Hit; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.hitfield.FieldPart; +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.ImmutableFieldPart; +import com.yahoo.prelude.hitfield.StringFieldPart; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +/** + * A searcher which does quoting based on a quoting table. + * + * May be extended to do quoting template sensitive. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class QuotingSearcher extends Searcher { + + // Char to String + private QuoteTable quoteTable; + + private synchronized void setQuoteTable(QuoteTable quoteTable) { + this.quoteTable = quoteTable; + } + private synchronized QuoteTable getQuoteTable() { + return quoteTable; + } + + private static class QuoteTable { + private final int lowerUncachedBound; + private final int upperUncachedBound; + private final Map<Character, String> quoteMap; + private final String[] lowerTable; + private final boolean useMap; + private final boolean isEmpty; + + public QuoteTable(QrQuotetableConfig config) { + int minOrd = 0; + int maxOrd = 0; + String[] newLowerTable = new String[256]; + boolean newUseMap = false; + boolean newIsEmpty = true; + Map<Character, String> newQuoteMap = new HashMap<>(); + for (Iterator<?> i = config.character().iterator(); i.hasNext(); ) { + QrQuotetableConfig.Character character + = (QrQuotetableConfig.Character)i.next(); + if (character.ordinal() > 256) { + newIsEmpty = false; + newQuoteMap.put(new Character((char)character.ordinal()), + character.quoting()); + newUseMap = true; + if (minOrd == 0 || character.ordinal() < minOrd) + minOrd = character.ordinal(); + if (maxOrd == 0 || character.ordinal() > maxOrd) + maxOrd = character.ordinal(); + } + else { + newIsEmpty = false; + newLowerTable[character.ordinal()] + = character.quoting(); + } + } + lowerUncachedBound = minOrd; + upperUncachedBound = maxOrd; + quoteMap = newQuoteMap; + useMap = newUseMap; + isEmpty = newIsEmpty; + lowerTable = newLowerTable; + } + public String get(char c) { + if (isEmpty) + return null; + int ord = (int)c; + if (ord < 256) { + return lowerTable[ord]; + } + else { + if ((!useMap) || ord < lowerUncachedBound + || ord > upperUncachedBound) + { + return null; + } + else { + return quoteMap.get(new Character(c)); + } + } + } + public boolean isEmpty() { + return isEmpty; + } + } + + public QuotingSearcher(ComponentId id, QrQuotetableConfig config) { + super(id); + setQuoteTable(new QuoteTable(config)); + } + + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + execution.fill(result); + QuoteTable translations = getQuoteTable(); + if (translations == null || translations.isEmpty()) { + return result; + } + for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext(); ) { + Hit h = i.next(); + if (h instanceof FastHit) { + quoteProperties((FastHit)h, translations); + } + } + return result; + } + + private void quoteProperties(FastHit hit, QuoteTable translations) { + for (Iterator<?> i = ((Set<?>) hit.fields().keySet()).iterator(); i.hasNext(); ) { + String propertyName = (String) i.next(); + Object entry = hit.getField(propertyName); + if (entry == null) { + continue; + } + Class<? extends Object> propertyType = entry.getClass(); + if (propertyType.equals(HitField.class)) { + quoteField((HitField) entry, translations); + } else if (propertyType.equals(String.class)) { + quoteProperty(hit, propertyName, (String)entry, translations); + } + } + } + + private void quoteProperty(Hit hit, String fieldname, String toQuote, QuoteTable translations) { + List<FieldPart> l = translate(toQuote, translations, true); + if (l != null) { + HitField hf = new HitField(fieldname, toQuote); + hf.setTokenizedContent(l); + hit.setField(fieldname, hf); + } + } + + + private void quoteField(HitField field, QuoteTable translations) { + for (ListIterator<FieldPart> i = field.listIterator(); i.hasNext(); ) { + FieldPart f = i.next(); + if (!f.isFinal()) { + List<FieldPart> newFieldParts = translate(f.getContent(), translations, + f.isToken()); + if (newFieldParts != null) { + i.remove(); + for (Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext(); ) { + i.add(j.next()); + } + } + } + } + } + + private List<FieldPart> translate(String toQuote, QuoteTable translations, + boolean isToken) { + List<FieldPart> newFieldParts = null; + int lastIdx = 0; + for (int i = 0; i < toQuote.length(); i++) { + String quote = translations.get(toQuote.charAt(i)); + if (quote != null) { + if (newFieldParts == null) { + newFieldParts = new ArrayList<>(); + } + if (lastIdx != i) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx, i), + isToken)); + } + String initContent = Character.toString(toQuote.charAt(i)); + newFieldParts.add(new ImmutableFieldPart(initContent, + quote, + isToken)); + lastIdx = i+1; + } + } + if (lastIdx > 0 && lastIdx < toQuote.length()) { + newFieldParts.add( + new StringFieldPart(toQuote.substring(lastIdx), + isToken)); + } + return newFieldParts; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java new file mode 100644 index 00000000000..3706f4fa9ea --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidatePredicateSearcher.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import java.util.Optional; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.prelude.Index; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PredicateQueryItem; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.querytransform.BooleanSearcher; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; + +import java.util.Collection; + +/** + * Checks that predicate queries don't use values outside the defined upper/lower bounds. + * + * @author <a href="mailto:magnarn@yahoo-inc.com">Magnar Nedland</a> + */ +@After(BooleanSearcher.PREDICATE) +public class ValidatePredicateSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query, execution.context().getIndexFacts().newSession(query)); + if (e.isPresent()) { + Result r = new Result(query); + r.hits().addError(e.get()); + return r; + } + return execution.search(query); + } + + private Optional<ErrorMessage> validate(Query query, IndexFacts.Session indexFacts) { + ValidatePredicateVisitor visitor = new ValidatePredicateVisitor(indexFacts); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class ValidatePredicateVisitor extends ToolBox.QueryVisitor { + + private final IndexFacts.Session indexFacts; + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + public ValidatePredicateVisitor(IndexFacts.Session indexFacts) { + this.indexFacts = indexFacts; + } + + @Override + public boolean visit(Item item) { + if (item instanceof PredicateQueryItem) { + visit((PredicateQueryItem) item); + } + return true; + } + + private void visit(PredicateQueryItem item) { + Index index = getIndexFromUnionOfDocumentTypes(item); + for (PredicateQueryItem.RangeEntry entry : item.getRangeFeatures()) { + long value = entry.getValue(); + if (value < index.getPredicateLowerBound() || value > index.getPredicateUpperBound()) { + errorMessage = Optional.of(ErrorMessage.createIllegalQuery( + String.format("%s=%d outside configured predicate bounds.", entry.getKey(), value))); + } + } + } + + private Index getIndexFromUnionOfDocumentTypes(PredicateQueryItem item) { + return indexFacts.getIndex(item.getIndexName()); + } + + @Override + public void onExit() {} + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java new file mode 100644 index 00000000000..ee8a896f73b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/ValidateSortingSearcher.java @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.searcher; + +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.search.config.ClusterConfig; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.query.Sorting; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.yahoo.prelude.querytransform.NormalizingSearcher.ACCENT_REMOVAL; + + +/** + * Check sorting specification makes sense to the search cluster before + * passing it on to the backend. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +@Before(PhaseNames.BACKEND) +@After(ACCENT_REMOVAL) +public class ValidateSortingSearcher extends Searcher { + + private Map<String, AttributesConfig.Attribute> attributeNames = null; + private String clusterName = ""; + private final QrSearchersConfig.Searchcluster.Indexingmode.Enum indexingMode; + + public String getClusterName() { + return clusterName; + } + + public void setClusterName(String clusterName) { + this.clusterName = clusterName; + } + + private Map<String, AttributesConfig.Attribute> getAttributeNames() { + return attributeNames; + } + + public void setAttributeNames(Map<String, AttributesConfig.Attribute> attributeNames) { + this.attributeNames = attributeNames; + } + + public void initAttributeNames(AttributesConfig config) { + HashMap<String, AttributesConfig.Attribute> attributes = new HashMap<>(config.attribute().size()); + + for (AttributesConfig.Attribute attr : config.attribute()) { + if (AttributesConfig.Attribute.Collectiontype.SINGLE != attr.collectiontype()) { + continue; // cannot sort on multivalue attributes + } + attributes.put(attr.name(), attr); + } + setAttributeNames(attributes); + } + + public ValidateSortingSearcher(QrSearchersConfig qrsConfig, ClusterConfig clusterConfig, + AttributesConfig attributesConfig) + { + initAttributeNames(attributesConfig); + setClusterName(qrsConfig.searchcluster(clusterConfig.clusterId()).name()); + indexingMode = qrsConfig.searchcluster(clusterConfig.clusterId()).indexingmode(); + } + + @Override + public Result search(Query query, Execution execution) { + if (indexingMode != QrSearchersConfig.Searchcluster.Indexingmode.STREAMING) { + ErrorMessage e = validate(query); + if (e != null) { + Result r = new Result(query); + r.hits().addError(e); + return r; + } + } + return execution.search(query); + } + + private static Sorting.UcaSorter.Strength config2Strength(AttributesConfig.Attribute.Sortstrength.Enum s) { + if(s == AttributesConfig.Attribute.Sortstrength.PRIMARY) { + return Sorting.UcaSorter.Strength.PRIMARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.SECONDARY) { + return Sorting.UcaSorter.Strength.SECONDARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.TERTIARY) { + return Sorting.UcaSorter.Strength.TERTIARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.QUATERNARY) { + return Sorting.UcaSorter.Strength.QUATERNARY; + } else if(s == AttributesConfig.Attribute.Sortstrength.IDENTICAL) { + return Sorting.UcaSorter.Strength.IDENTICAL; + } + return Sorting.UcaSorter.Strength.PRIMARY; + } + private ErrorMessage validate(Query query) { + Sorting sorting = query.getRanking().getSorting(); + List<Sorting.FieldOrder> l = (sorting != null) ? sorting.fieldOrders() : null; + + if (l == null) { + return null; + } + Map<String, AttributesConfig.Attribute> names = getAttributeNames(); + if (names == null) { + return null; + } + + String queryLocale = null; + if (query.getModel().getLocale() != null) { + queryLocale = query.getModel().getLocale().toString(); + } + + for (Sorting.FieldOrder f : l) { + String name = f.getFieldName(); + if ("[rank]".equals(name) || "[docid]".equals(name)) { + } else if (names.containsKey(name)) { + AttributesConfig.Attribute attrConfig = names.get(name); + if (attrConfig != null) { + if (f.getSortOrder() == Sorting.Order.UNDEFINED) { + f.setAscending(attrConfig.sortascending()); + } + if (f.getSorter().getClass().equals(Sorting.AttributeSorter.class)) { + // This indicates that it shall use default. + if ((attrConfig.datatype() == AttributesConfig.Attribute.Datatype.STRING)) { + if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.UCA) { + String locale = attrConfig.sortlocale(); + if (locale == null || locale.isEmpty()) { + locale = queryLocale; + } + // can only use UcaSorter if we have knowledge about wanted locale + if (locale != null) { + f.setSorter(new Sorting.UcaSorter(name, locale, Sorting.UcaSorter.Strength.UNDEFINED)); + } else { + // wanted UCA but no locale known, so use lowercase as fallback + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.LOWERCASE) { + f.setSorter(new Sorting.LowerCaseSorter(name)); + } else if (attrConfig.sortfunction() == AttributesConfig.Attribute.Sortfunction.RAW) { + f.setSorter(new Sorting.RawSorter(name)); + } else { + // default if no config found for this string attribute + f.setSorter(new Sorting.LowerCaseSorter(name)); + } + } + } + if (f.getSorter() instanceof Sorting.UcaSorter) { + Sorting.UcaSorter sorter = (Sorting.UcaSorter) f.getSorter(); + String locale = sorter.getLocale(); + + if (locale == null || locale.isEmpty()) { + // first fallback + locale = attrConfig.sortlocale(); + } + if (locale == null || locale.isEmpty()) { + // second fallback + locale = queryLocale; + } + // final fallback + if (locale == null || locale.isEmpty()) { + locale = "en_US"; + } + + // getLogger().info("locale = " + locale + " attrConfig.sortlocale.value() = " + attrConfig.sortlocale.value() + " query.getLanguage() = " + query.getModel().getLanguage()); + // getLogger().info("locale = " + locale); + + Sorting.UcaSorter.Strength strength = sorter.getStrength(); + if (sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) { + strength = config2Strength(attrConfig.sortstrength()); + } + if ((sorter.getStrength() == Sorting.UcaSorter.Strength.UNDEFINED) || (sorter.getLocale() == null) || sorter.getLocale().isEmpty()) { + // getLogger().info("locale = " + locale + " strength = " + strength.toString()); + sorter.setLocale(locale, strength); + } + //getLogger().info("locale = " + locale + " strength = " + strength.toString() + "decompose = " + sorter.getDecomposition()); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has attribute config for field: " + name); + } + } else { + return ErrorMessage.createInvalidQueryParameter("The cluster " + getClusterName() + " has no sortable attribute named: " + name); + } + } + return null; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java new file mode 100644 index 00000000000..5a795e859af --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.searcher; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java new file mode 100644 index 00000000000..d3f51e76712 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.yahoo.search.Query; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.engine.RuleEngine; +import com.yahoo.prelude.semantics.parser.ParseException; +import com.yahoo.prelude.semantics.rule.*; +import com.yahoo.protect.Validator; + +import java.io.File; +import java.util.*; + +/** + * A set of semantic production rules and named conditions used to analyze + * and rewrite queries + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleBase { + + /** The globally identifying name of this rule base */ + private String name; + + /** The name of the source of this rules */ + private String source; + + /** The name of the automata file used, or null if none */ + protected String automataFileName=null; + + /** + * True if this rule base is default. + * The semantics of default is left to the surrounding framework + */ + private boolean isDefault=false; + + private List<ProductionRule> productionRules=new java.util.ArrayList<>(); + + private Map<String, NamedCondition> namedConditions=new java.util.LinkedHashMap<>(); + + /** The analyzer used to do evaluations over this rule base */ + private RuleEngine analyzer=new RuleEngine(this); + + private static final PhraseMatcher nullPhraseMatcher=PhraseMatcher.getNullMatcher(); + + /** + * The matcher using an automata to match terms and phrases prior to matching rules + * or the null matcher if no matcher is used. + */ + private PhraseMatcher phraseMatcher=nullPhraseMatcher; + + /** + * The names of the rule bases included indirectly or directly in this + * Ordered by first to last included + */ + private Set<String> includedNames=new java.util.LinkedHashSet<>(); + + /** + * True if this uses an automata, even if an automata is not present right now. Useful to validate without + * having automatas available + */ + private boolean usesAutomata=false; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + /** Creates an empty rule base. TODO: Disallow */ + public RuleBase() { + } + + /** Creates an empty rule base */ + public RuleBase(String name) { + setName(name); + } + + /** + * Creates a rule base from a file + * + * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading one of the files + * @throws ParseException if the rule file can not be parsed correctly + * @throws RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromFile(String ruleFile,String automataFile) throws java.io.IOException, ParseException { + return new RuleImporter().importFile(ruleFile,automataFile); + } + + /** + * Creates a rule base from a string + * + * @param name the name of the rule base + * @param ruleString the rule string to read + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading the automata file + * @throws com.yahoo.prelude.semantics.parser.ParseException if the rule file can not be parsed correctly + * @throws com.yahoo.prelude.semantics.RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromString(String name,String ruleString,String automataFile) throws java.io.IOException, ParseException { + RuleBase base=new RuleImporter().importString(ruleString,automataFile,new RuleBase()); + base.setName(name); + return base; + } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + /** + * <p>Include another rule base into this. This <b>transfers ownership</b> + * of the given rule base - it can not be subsequently used for any purpose + * (including accessing).</p> + * + * <p>Each rule base will only be included by the first include directive enountered + * for that rule base.</p> + */ + public void include(RuleBase include) { + productionRules.add(new IncludeDirective(include)); + includedNames.addAll(include.includedNames); + includedNames.add(include.getName()); + } + + /** Rules are order based - they are included recursively depth first */ + private void inlineIncluded() { + // Re-add our own conditions last to - added later overrides + Map<String, NamedCondition> thisConditions=namedConditions; + namedConditions=new LinkedHashMap<>(); + + Set<RuleBase> included=new HashSet<>(); + included.add(this); + for (ListIterator<ProductionRule> i=productionRules.listIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + i.remove(); + RuleBase toInclude=((IncludeDirective)rule).getIncludedBase(); + if ( ! included.contains(toInclude)) + toInclude.inlineIn(this,i,included); + } + + namedConditions.putAll(thisConditions); + } + + /** + * Recursively include this and everything it includes into the given rule base. + * Skips bases already included in this. + */ + private void inlineIn(RuleBase receiver,ListIterator<ProductionRule> receiverRules,Set<RuleBase> included) { + if (included.contains(this)) return; + included.add(this); + + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if (rule instanceof IncludeDirective) + ((IncludeDirective)rule).getIncludedBase().inlineIn(receiver,receiverRules,included); + else + receiverRules.add(rule); + } + + receiver.namedConditions.putAll(namedConditions); + } + + /** Adds a named condition which can be referenced by rules */ + public void addCondition(NamedCondition namedCondition) { + namedConditions.put(namedCondition.getName(),namedCondition); + + Condition condition=namedCondition.getCondition(); + Condition superCondition=findIncludedCondition(namedCondition.getName()); + resolveSuper(condition,superCondition); + } + + private void resolveSuper(Condition condition,Condition superCondition) { + if (condition instanceof SuperCondition) { + ((SuperCondition)condition).setCondition(superCondition); + } + else if (condition instanceof CompositeCondition) { + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + resolveSuper(subCondition,superCondition); + } + } + } + + private Condition findIncludedCondition(String name) { + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + RuleBase included=((IncludeDirective)rule).getIncludedBase(); + NamedCondition condition=included.getCondition(name); + if (condition!=null) return condition.getCondition(); + included.findIncludedCondition(name); + // FIXME: dead code commented out + // if (condition!=null) return condition.getCondition(); + } + return null; + } + + /** + * Returns whether this rule base - directly or through other includes - includes + * the rule base with the given name + */ + public boolean includes(String ruleBaseName) { + return includedNames.contains(ruleBaseName); + } + + /** + * Sets the name of this rule base. + * If this rule base is given to a searcher, it must be removed before the name + * change, and then re-added + */ + public void setName(String name) { + Validator.ensureNotNull("Rule base name",name); + this.name=name; + } + + /** Returns the name of this rule base. This is never null. */ + public String getName() { return name; } + + /** + * Sets the name of the automata file to use as a source of condition matches. + * To reload the automata, call this again. This can be done safely at any + * point by any thread while this rule base is in use. + * + * @throws IllegalArgumentException if the file is not found + */ + public void setAutomataFile(String automataFile) { + if ( ! new File(automataFile).exists()) + throw new IllegalArgumentException("Automata file '" + automataFile + "' " + + "included in " + this + " not found"); + phraseMatcher=new PhraseMatcher(automataFile); + phraseMatcher.setIgnorePluralForm(true); + phraseMatcher.setMatchAll(true); + phraseMatcher.setMatchPhraseItems(true); + phraseMatcher.setMatchSingleItems(true); + setPhraseMatcher(phraseMatcher); + this.automataFileName=automataFile; + } + + /** Returns the name of the automata file used, or null if none */ + public String getAutomataFile() { return automataFileName; } + + /** Sets whether this base is default, the semantics of default is left to the application */ + public void setDefault(boolean isDefault) { this.isDefault=isDefault; } + + /** Returns whether this base is default, the semantics of default is left to the application */ + public boolean isDefault() { return isDefault; } + + /** Thread safely sets the phrase matcher to use in this, or null to not use a phrase matcher */ + public synchronized void setPhraseMatcher(PhraseMatcher matcher) { + if (matcher==null) + this.phraseMatcher = nullPhraseMatcher; + else + this.phraseMatcher = matcher; + } + + /** Thread safely gets the phrase matcher to use in this */ + public synchronized PhraseMatcher getPhraseMatcher() { + return this.phraseMatcher; + } + + /** + * The identifying name of the source of this rule base. + * The absolute file name if this came from a file. + */ + public String getSource() { return source; } + + /** + * Sets the name of the source of this rule base. If this came from a file, + * the source must be set to the absolute file name of the rule base + */ + public void setSource(String source) { this.source = source; } + + /** Returns whether this uses a phrase matcher automata */ + public boolean usesAutomata() { + return usesAutomata || phraseMatcher!=nullPhraseMatcher; + } + + /** + * Set to truew if this uses an automata, even if an automata is not present right now. + * Useful to validate without having automatas available + */ + void setUsesAutomata(boolean usesAutomata) { this.usesAutomata=usesAutomata; } + + // Note that included rules are added though a list iterator, not this */ + public void addRule(ProductionRule productionRule) { + productionRules.add(productionRule); + } + + /** Returns a named condition, or null if no condition with that name exists */ + public NamedCondition getCondition(String name) { + return namedConditions.get(name); + } + + /** + * Call this when all rules are added, before any rule evaluation starts. + * + * @throws RuleBaseException if there is an inconsistency in the rule base. + */ + public void initialize() { + inlineIncluded(); + makeReferences(); + } + + /** + * Analyzes a query over this rule base + * + * @param query the query to analyze + * @param traceLevel the level of tracing to add to the query + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String analyze(Query query,int traceLevel) { + int queryTraceLevel=query.getTraceLevel(); + if (traceLevel>0 && queryTraceLevel==0) + query.setTraceLevel(1); + + matchAutomata(query,traceLevel); + String error=analyzer.evaluate(query,traceLevel); + + query.setTraceLevel(queryTraceLevel); + return error; + } + + protected void matchAutomata(Query query,int traceLevel) { + List<PhraseMatcher.Phrase> matches=getPhraseMatcher().matchPhrases(query.getModel().getQueryTree().getRoot()); + if (matches==null || matches.size()==0) return; + for (Iterator<PhraseMatcher.Phrase> i=matches.iterator(); i.hasNext(); ) { + PhraseMatcher.Phrase phrase= i.next(); + if (traceLevel>=3) + query.trace("Semantic searcher automata matched " + phrase,false,1); + + annotatePhrase(phrase,query,traceLevel); + } + } + + // Note: When changing this method, change CompatibleRuleBase as well! + // TODO: Values are not added right now + protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { + for (StringTokenizer tokens=new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { + String token=tokens.nextToken(); + int semicolonIndex=token.indexOf(";"); + String annotation=token; + String value=""; + if (semicolonIndex>0) { + annotation=token.substring(0,semicolonIndex); + value=token.substring(semicolonIndex+1); + } + + // Annotate all matched items + phrase.getItem(0).addAnnotation(annotation,phrase); + if (traceLevel>=4) + query.trace(" Annotating '" + phrase + "' as " + annotation + + (value.equals("") ? "" :"=" + value),false,1); + } + } + + private void makeReferences() { + for (Iterator<ProductionRule> i=ruleIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + rule.makeReferences(this); + } + for (Iterator<NamedCondition> i=conditionIterator(); i.hasNext(); ) { + NamedCondition namedCondition=i.next(); + namedCondition.getCondition().makeReferences(this); + } + } + + /** Returns the rules in added order */ + public ListIterator<ProductionRule> ruleIterator() { return productionRules.listIterator(); } + + /** Returns the rules unmodifiable */ + public List<ProductionRule> rules() { + return Collections.unmodifiableList(productionRules); + } + + /** Returns the named conditions in added order */ + public Iterator<NamedCondition> conditionIterator() { return namedConditions.values().iterator(); } + + /** Returns true if the given object is a rule base having the same name as this */ + public boolean equals(Object object) { + if ( ! (object instanceof RuleBase)) return false; + return ((RuleBase)object).getName().equals(this.getName()); + } + + public int hashCode() { + return getName().hashCode(); + } + + public String toString() { + return "rule base '" + getName() + "'"; + } + + /** + * Returns a string containing all the rules and conditions of this rule base + * in the form they will be evaluated, with all included rule bases inlined + */ + public String toContentString() { + StringBuilder b=new StringBuilder(); + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + b.append("\n"); + b.append("\n"); + for (Iterator<NamedCondition> i=namedConditions.values().iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + return b.toString(); + } + + /** A placeholder for an included rule base until it is inlined */ + private static class IncludeDirective extends ProductionRule { + + private RuleBase includedBase; + + public IncludeDirective(RuleBase ruleBase) { + this.includedBase=ruleBase; + } + + public RuleBase getIncludedBase() { return includedBase; } + + /** Not used */ + public String getSymbol() { return ""; } + + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java new file mode 100644 index 00000000000..34c113ceec8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +/** + * Thrown on rule base consistency problems + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class RuleBaseException extends RuntimeException { + + public RuleBaseException(String message) { + super(message); + } + + public RuleBaseException(String message,Exception cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java new file mode 100644 index 00000000000..1dab816f22b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.parser.*; + +/** + * Imports rule bases from various sources. + * + * @author bratseth + */ +// Uses the JavaCC-generated parser to read rule bases. +// This is an intermediate between the parser and the rule base being loaded +// on implementation of some directives, for example, it knows where to find +// rule bases included into others, while neither the rule base or the parser knows. +public class RuleImporter { + + /** + * If this is set, imported rule bases are looked up in this config + * otherwise, they are looked up as files + */ + private SemanticRulesConfig config = null; + + /** + * Ignore requests to read automata files. + * Useful to validate rule bases without having automatas present + */ + private boolean ignoreAutomatas = false; + + /** + * Ignore requests to include files. + * Useful to validate rule bases one by one in config + */ + private boolean ignoreIncludes = false; + + /** Create a rule importer which will read from file */ + public RuleImporter() { + this(null, false); + } + + /** Create a rule importer which will read from a config object */ + public RuleImporter(SemanticRulesConfig config) { + this(config, false); + } + + public RuleImporter(boolean ignoreAutomatas) { + this(null, ignoreAutomatas); + } + + public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes) { + this(null, ignoreAutomatas, ignoreIncludes); + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas) { + this.config=config; + this.ignoreAutomatas=ignoreAutomatas; + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, boolean ignoreIncludes) { + this.config = config; + this.ignoreAutomatas = ignoreAutomatas; + this.ignoreIncludes = ignoreIncludes; + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName) throws IOException, ParseException { + return importFile(fileName,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile) throws IOException, ParseException { + return importFile(fileName,automataFile,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @param ruleBase an existing rule base to import these rules into, or null + * to create a new + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + ruleBase=privateImportFile(fileName,automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + public RuleBase privateImportFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + BufferedReader reader=null; + try { + reader= IOUtils.createReader(fileName, "utf-8"); + File file=new File(fileName); + String absoluteFileName=file.getAbsolutePath(); + if (ruleBase==null) + ruleBase=new RuleBase(); + ruleBase.setName(stripLastName(file.getName())); + privateImportFromReader(reader,absoluteFileName,automataFile,ruleBase); + return ruleBase; + } + finally { + IOUtils.closeReader(reader); + } + } + + /** Imports all the rule files (files ending by "sr") in the given directory */ + public List<RuleBase> importDir(String ruleBaseDir) throws IOException, ParseException { + File ruleBaseDirFile=new File(ruleBaseDir); + if (!ruleBaseDirFile.exists()) + throw new IOException("Rule base dir '" + ruleBaseDirFile.getAbsolutePath() + "' does not exist"); + File[] files=ruleBaseDirFile.listFiles(); + Arrays.sort(files); + List<RuleBase> ruleBases=new java.util.ArrayList<>(); + for (File file : files) { + if (!file.getName().endsWith(".sr")) continue; + RuleBase base = importFile(file.getAbsolutePath()); + ruleBases.add(base); + } + return ruleBases; + } + + /** Read and include a rule base in another */ + public void include(String ruleBaseName,RuleBase ruleBase) throws java.io.IOException, ParseException { + if (ignoreIncludes) return; + RuleBase include; + if (config==null) { + include=privateImportFromDirectory(ruleBaseName,ruleBase); + } + else { + include=privateImportFromConfig(ruleBaseName); + } + ruleBase.include(include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromDirectory(String ruleBaseName,RuleBase ruleBase) throws IOException, ParseException { + RuleBase include = new RuleBase(); + String includeDir=new File(ruleBase.getSource()).getParentFile().getAbsolutePath(); + if (!ruleBaseName.endsWith(".sr")) + ruleBaseName=ruleBaseName + ".sr"; + File importFile=new File(includeDir,ruleBaseName); + if (!importFile.exists()) + throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'"); + return privateImportFile(importFile.getPath(),null,include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromConfig(String ruleBaseName) throws IOException, ParseException { + SemanticRulesConfig.Rulebase ruleBaseConfig=findRuleBaseConfig(config,ruleBaseName); + if (ruleBaseConfig==null) + ruleBaseConfig=findRuleBaseConfig(config,stripLastName(ruleBaseName)); + if (ruleBaseConfig==null) + throw new ParseException("Could not find included rule base '" + ruleBaseName + "'"); + return privateImportConfig(ruleBaseConfig); + } + + private SemanticRulesConfig.Rulebase findRuleBaseConfig(SemanticRulesConfig config,String ruleBaseName) { + for (Object aRulebase : config.rulebase()) { + SemanticRulesConfig.Rulebase ruleBaseConfig = (SemanticRulesConfig.Rulebase) aRulebase; + if (ruleBaseConfig.name().equals(ruleBaseName)) + return ruleBaseConfig; + } + return null; + } + + public void setAutomata(RuleBase base,String automata) { + if (ignoreAutomatas) + base.setUsesAutomata(true); // Stop it from failing on automata condition references + else + base.setAutomataFile(automata); + } + + static String stripLastName(String fileName) { + int lastDotIndex=fileName.lastIndexOf("."); + if (lastDotIndex<0) return fileName; + return fileName.substring(0,lastDotIndex); + } + + public RuleBase importString(String string, String automataFile) throws IOException, ParseException { + return importString(string, automataFile, null, null); + } + + public RuleBase importString(String string, String automataFile, String sourceName) throws IOException, ParseException { + return importString(string, automataFile, sourceName, null); + } + + public RuleBase importString(String string, String automataFile, RuleBase ruleBase) throws IOException, ParseException { + return importString(string, automataFile, null, ruleBase); + } + + public RuleBase importString(String string, String automataFile, String sourceName, RuleBase ruleBase) throws IOException, ParseException { + return importFromReader(new StringReader(string), sourceName, automataFile, ruleBase); + } + + public RuleBase importConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + RuleBase ruleBase=privateImportConfig(ruleBaseConfig); + ruleBase.initialize(); + return ruleBase; + } + + /** Imports an unitialized rule base */ + public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + if (config==null) throw new IllegalStateException("Must initialize with config if importing from config"); + RuleBase ruleBase = new RuleBase(); + ruleBase.setName(ruleBaseConfig.name()); + return privateImportFromReader(new StringReader(ruleBaseConfig.rules()),"semantic-rules.cfg", + ruleBaseConfig.automata(),ruleBase); + } + + public RuleBase importFromReader(Reader reader,String sourceInfo,String automataFile) throws ParseException { + return importFromReader(reader,sourceInfo,automataFile,null); + } + + /** + * Imports rules from a reader + * + * @param reader the reader containing rules on the proper syntax + * @param sourceName a string describing the source of the rules used for error messages + * @param ruleBase an existing rule base to import the rules into, or null to create a new one + * @return the rule base containing the rules added from the reader + * @throws ParseException if the reader contains illegal rule syntax + */ + public RuleBase importFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + ruleBase=privateImportFromReader(reader, sourceName, automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + /** Returns an unitialized rule base */ + public RuleBase privateImportFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + try { + if (ruleBase==null) { + ruleBase=new RuleBase(); + if (sourceName == null) + sourceName = "anonymous"; + ruleBase.setName(sourceName); + } + ruleBase.setSource(sourceName.replace('\\','/')); + new SemanticsParser(reader).semanticRules(ruleBase, this); + if (automataFile!=null && !automataFile.isEmpty()) + ruleBase.setAutomataFile(automataFile.replace('\\','/')); + return ruleBase; + } catch (Throwable t) { // also catches token mgr errors + ParseException p=new ParseException("Could not parse '" + shortenPath(sourceName) + "'"); + p.initCause(t); + throw p; + } + } + + /** + * Snips what's in from of rules/ if "rules/" is present in the string + * to avoid displaying details about where application content is copied + * (if rules/ is present, these rules are read from an applicatino package) + */ + private static String shortenPath(String path) { + int rulesIndex=path.indexOf("rules/"); + if (rulesIndex<0) return path; + return path.substring(rulesIndex); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java new file mode 100644 index 00000000000..f4858bbb9e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +import static com.yahoo.prelude.querytransform.IndexCombinatorSearcher.MIXED_RECALL_REWRITE; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Analyzes query semantics and enhances the query to reflect findings + * + * @author bratseth + */ +@After(PhaseNames.RAW_QUERY) +@Before({PhaseNames.TRANSFORMED_QUERY, STEMMING, MIXED_RECALL_REWRITE}) +public class SemanticSearcher extends Searcher { + + private static final CompoundName rulesRulebase=new CompoundName("rules.rulebase"); + private static final CompoundName rulesOff=new CompoundName("rules.off"); + private static final CompoundName tracelevelRules=new CompoundName("tracelevel.rules"); + + /** The default rule base of this */ + private RuleBase defaultRuleBase; + + /** All rule bases of this (always including the default) */ + private final Map<String, RuleBase> ruleBases = new java.util.HashMap<>(); + + /** Creates a semantic searcher using the given default rule base */ + public SemanticSearcher(RuleBase ruleBase) { + this(Collections.singletonList(ruleBase)); + defaultRuleBase = ruleBase; + } + + public SemanticSearcher(RuleBase ... ruleBases) { + this(Arrays.asList(ruleBases)); + } + + @Inject + public SemanticSearcher(SemanticRulesConfig config) { + this(toList(config)); + } + + public SemanticSearcher(List<RuleBase> ruleBases) { + for (RuleBase ruleBase : ruleBases) { + if (ruleBase.isDefault()) + defaultRuleBase = ruleBase; + this.ruleBases.put(ruleBase.getName(),ruleBase); + } + } + + private static List<RuleBase> toList(SemanticRulesConfig config) { + try { + RuleImporter ruleImporter = new RuleImporter(config); + List<RuleBase> ruleBaseList = new java.util.ArrayList<>(); + for (SemanticRulesConfig.Rulebase ruleBaseConfig : config.rulebase()) { + RuleBase ruleBase = ruleImporter.importConfig(ruleBaseConfig); + if (ruleBaseConfig.isdefault()) + ruleBase.setDefault(true); + ruleBaseList.add(ruleBase); + } + return ruleBaseList; + } + catch (Exception e) { + throw new ConfigurationException("Failed configuring semantic rules",e); + } + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().getBoolean(rulesOff)) + return execution.search(query); + + int traceLevel= query.properties().getInteger(tracelevelRules, query.getTraceLevel()-2); + if (traceLevel<0) traceLevel=0; + RuleBase ruleBase=resolveRuleBase(query); + if (ruleBase==null) + return execution.search(query); + + String error=ruleBase.analyze(query,traceLevel); + if (error!=null) + return handleError(ruleBase, query,error); + else + return execution.search(query); + } + + private RuleBase resolveRuleBase(Query query) { + String ruleBaseName=query.properties().getString(rulesRulebase); + if (ruleBaseName==null || ruleBaseName.equals("")) return getDefaultRuleBase(); + RuleBase ruleBase=getRuleBase(ruleBaseName); + if (ruleBase==null) + throw new RuleBaseException("Requested rule base '" + ruleBaseName + "' does not exist"); + return ruleBase; + } + + private Result handleError(RuleBase ruleBase,Query query,String error) { + String message="Evaluation of query '" + query.getModel().getQueryTree() + + "' over '" + ruleBase + "' caused the invalid query '" + + query.getModel().getQueryTree().getRoot() + "': " + error; + getLogger().warning(message); + return new Result(query,ErrorMessage.createInvalidQueryTransformation(message)); + } + + /** Returns the default rule base */ + public RuleBase getDefaultRuleBase() { return defaultRuleBase; } + + /** + * Returns the rule base of the given name, or null if none. + * The part of the name following the last dot (if any) is removed before lookup. + */ + public RuleBase getRuleBase(String ruleBaseName) { + ruleBaseName=RuleImporter.stripLastName(ruleBaseName); + return ruleBases.get(ruleBaseName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java new file mode 100644 index 00000000000..b04e693089a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.benchmark; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; + +import com.yahoo.search.Query; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +public class RuleBaseBenchmark { + + public void benchmark(String ruleBaseFile, String queryFile, int iterations) + throws IOException, ParseException { + + String fsaFile = null; + if(ruleBaseFile.endsWith(".sr")){ + fsaFile = ruleBaseFile.substring(0,ruleBaseFile.length()-3) + ".fsa"; + File fsa = new File(fsaFile); + if(!fsa.exists()){ + fsaFile = null; + } + } + RuleBase ruleBase = new RuleImporter().importFile(ruleBaseFile,fsaFile); + ArrayList<String> queries = new ArrayList<>(); + BufferedReader reader = new BufferedReader(new FileReader(queryFile)); + String line; + while((line=reader.readLine())!=null){ + queries.add(line); + } + Date start = new Date(); + for (int i=0;i<iterations;i++){ + for (Iterator<String> iter = queries.iterator(); iter.hasNext(); ){ + String queryString = iter.next(); + Query query = new Query("?query="+queryString); + ruleBase.analyze(query,0); + } + } + Date end = new Date(); + long elapsed = end.getTime()-start.getTime(); + System.out.print("BENCHMARK: rulebase=" + ruleBaseFile + + "\n fsa=" + fsaFile + + "\n queries=" + queryFile + + "\n iterations=" + iterations + + "\n elapsed=" + elapsed + "ms\n"); + } + + + public static void main(String[] args) { + if(args.length<3){ + System.out.println("USAGE: RuleBaseBenchmark ruleBaseFile queryFile iterations"); + System.exit(1); + } + + try { + new RuleBaseBenchmark().benchmark(args[0],args[1],Integer.parseInt(args[2])); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + //e.printStackTrace(); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries new file mode 100644 index 00000000000..3feebfb4698 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries @@ -0,0 +1,5 @@ +shop in geary street +foo +bar +aardwark +to be or not to be that is the question diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr new file mode 100644 index 00000000000..020699ba7cb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr @@ -0,0 +1,62 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Local use case + +[listing] [preposition] [place] -> listing:[listing] place:[place]; + +[listing] :- restaurant, shop, cafe, hotel; + +[preposition] :- in, at, near; + +[place] :- [street] [city], [street]; + +[street] :- geary street, geary; +[city] :- san francisco; + +# Shopping use case + +[brand] -> brand:[brand]; +[category] -> category:[category]; + +[brand] :- sony, dell; # Refer to automata later +[category] :- digital camera, camera, phone; # Ditto + +# Travel use case, note how explicit reference name overrides named condition as reference name + +# [from:place] [to:place] -> from:[from] to:[to] + +# Answers use case + +# why is [noun] ... [adjective] +> ?about:[noun] + +# Adding rule using the default query mode (and/or) + +[foobar] +> foobar:[foobar]; + +[foobar] :- foo, bar; + +# Adding rank rule + +[word] +> $foobar:[word]; + +[word] :- aardwark, word; + +# Literal production + +lotr -> lord of the rings; + +# Adding a negative + +java +> -coffee; + +# Adding another negative +# TODO: Term types in conditions +# java -coffee +> -island + +# "Stopwords" + +be -> ; +the -> ; + +[stopword] -> ; + +[stopword] :- to, or, not; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java new file mode 100644 index 00000000000..b0e50727773 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.config; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +/** + * Reads the rule base files in the given directory and creates a + * semantic-rules.cfg file containing those rule bases in the given output dir. + * + * @author bratseth + */ +// Note: This is not used by the config model any more and can be removed +public class RuleConfigDeriver { + + public void derive(String ruleBaseDir, String outputDir) throws IOException, ParseException { + // Validate output dir + File outputDirFile=new File(outputDir); + if (!outputDirFile.exists()) + throw new IOException("Output dir " + outputDirFile.getAbsolutePath() + + " does not exist"); + + List<RuleBase> ruleBases = derive(ruleBaseDir); + // Convert file to config + exportConfig(ruleBases,outputDir); + } + + public List<RuleBase> derive(String ruleBaseDir) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas=true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new RuleImporter(ignoreAutomatas).importDir(ruleBaseDir); + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + public List<RuleBase> derive(List<NamedReader> readers) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas = true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new ArrayList<>(); + RuleImporter importer = new RuleImporter(ignoreAutomatas); + for (NamedReader reader : readers) { + ruleBases.add(importer.importFromReader(reader, reader.getName(), null)); + } + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + private void ensureZeroOrOneDefault(List<RuleBase> ruleBases) throws ParseException { + String defaultName=null; + for (RuleBase ruleBase : ruleBases) { + if (defaultName != null && ruleBase.isDefault()) + throw new ParseException("Both '" + defaultName + "' and '" + ruleBase.getName() + + "' is marked as default, there can only be one"); + if (ruleBase.isDefault()) + defaultName = ruleBase.getName(); + } + } + + private void exportConfig(List<RuleBase> ruleBases, String outputDir) + throws IOException { + BufferedWriter writer=null; + try { + writer=IOUtils.createWriter(outputDir + "/semantic-rules.cfg","utf-8",false); + writer.write("rulebase[" + ruleBases.size() + "]\n"); + for (int i=0; i<ruleBases.size(); i++) { + RuleBase ruleBase= ruleBases.get(i); + writer.write("rulebase[" + i + "].name \"" + ruleBase.getName() + "\"\n"); + writer.write("rulebase[" + i + "].rules \""); + writeRuleBaseAsLine(ruleBase.getSource(),writer); + writer.write("\"\n"); + } + } + finally { + IOUtils.closeWriter(writer); + } + } + + private void writeRuleBaseAsLine(String file, Writer writer) throws IOException { + BufferedReader reader=null; + try { + reader=IOUtils.createReader(file,"utf-8"); + String line; + while (null!=(line=reader.readLine())) { + writer.write(line); + writer.write("\\n"); + } + } + finally { + IOUtils.closeReader(reader); + } + } + + public static void main(String[] args) { + if(args.length<2){ + System.out.println("USAGE: RuleConfigDeriver ruleBaseDir outputDir"); + System.exit(1); + } + + try { + new RuleConfigDeriver().derive(args[0],args[1]); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java new file mode 100644 index 00000000000..6b2801d10d7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.config; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java new file mode 100644 index 00000000000..f2650fef83a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.semantics.rule.Condition; + +/** + * A choice point in an rule evaluation. A choicepoint is open if there are other choices to make at the point, + * closed if there are no further choices. In addition it contains enough information to enable + * the rule evaluation to backtrack to this point + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Choicepoint { + + /** Whether there are (or may be) open choices to explore at this choicepoint yet */ + private boolean open=true; + + /** The number of tries made at this choice point */ + private int tries=0; + + /** The condition creating this choicepoint */ + private Condition condition; + + /** The state this choice point can be rolled back to */ + private State state; + + private RuleEvaluation owner; + + public Choicepoint(RuleEvaluation e, Condition condition) { + this.owner=e; + state=new State(this,e); + this.condition=condition; + if (e.getTraceLevel()>=5) + e.trace(5,"Added choice point at " + e.currentItem() + " for '" + condition + "'"); + } + + /** Returns the condition which created this choice point */ + public Condition getCondition() { return condition; } + + /** Returns wether there are (or may be) open choices to explore at this choicepoint yet */ + public boolean isOpen() { return open; } + + /** Marks this choice point as closed (!open) - there are no further choices to explore */ + public void close() { this.open=false; } + + /** Returns the number open tries made at this point */ + public int tryCount() { return tries; } + + /** Registers that another try has been made */ + public void addTry() { + tries++; + } + + /** + * Backtrack to the evaluation state at the point where this choicepoint were instantiated. + */ + public void backtrack() { + state.backtrack(owner); + if (owner.getTraceLevel()>=5) + owner.trace(5,"Backtracked to " + owner.currentItem() + " for '" + condition + "'"); + } + + /** Backtracks the position only, not matches */ + public void backtrackPosition() { + state.backtrackPosition(owner); + } + + /** + * Updates the state of this choice point to the current state of its evaluation + */ + public void updateState() { + state.updateState(owner); + } + + /** Returns the state of this choice point */ + public State getState() { return state; } + + /** The state of this choicepoint */ + public final static class State { + + private int position=0; + + private int referencedMatchCount=0; + + private int nonreferencedMatchCount=0; + + public State(Choicepoint choicepoint,RuleEvaluation evaluation) { + updateState(evaluation); + } + + public void updateState(RuleEvaluation evaluation) { + position=evaluation.currentPosition(); + referencedMatchCount=evaluation.getReferencedMatchCount(); + nonreferencedMatchCount=evaluation.getNonreferencedMatchCount(); + } + + /** Backtrack to the evaluation state at the point where this choicepoint were instantiated */ + public void backtrack(RuleEvaluation e) { + backtrackPosition(e); + + // Is this check masking errors? + if (e.referencedMatches().size()>referencedMatchCount) + e.referencedMatches().subList(referencedMatchCount, + e.referencedMatches().size()) + .clear(); + // Is this check masking errors? + if (e.nonreferencedMatches().size()>nonreferencedMatchCount) + e.nonreferencedMatches().subList(nonreferencedMatchCount, + e.nonreferencedMatches().size()) + .clear(); + } + + public void backtrackPosition(RuleEvaluation e) { + e.setPosition(position); + } + + public int getPosition() { return position; } + + public int getReferencedMatchCount() { return referencedMatchCount; } + + public int getNonreferencedMatchCount() { return nonreferencedMatchCount; } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java new file mode 100644 index 00000000000..fe3543fc655 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * An evaluation of a query over a rule base. There is one evaluation for each evaluation + * of one query over one rule base. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Evaluation { + + // TODO: Retrofit query into the namespace construct + private ParameterNameSpace parameterNameSpace=null; + + private Query query; + + /** The current index into the flattened item list */ + private int currentIndex = 0; + + /** Query items flattened to a list iterator */ + private List<FlattenedItem> flattenedItems; + + /** The rule evaluation context, can be reset once the rule is evaluated */ + private RuleEvaluation ruleEvaluation; + + /** + * The amount of context information to collect about this evaluation. + * 0 means no context information, higher numbers means more context information. + */ + private int traceLevel=0; + + private String traceIndentation=""; + + /** See RuleEngine */ + private Set<Integer> matchDigests=new HashSet<>(); + + /** The previous size of this query (see RuleEngine), set on matches only */ + private int previousQuerySize=0; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + public Evaluation(Query query) { + this(query,0); + } + + /** + * Creates a new evaluation + * + * @param query the query this evaluation is for + * @param traceLevel the amount of tracing to do + */ + public Evaluation(Query query,int traceLevel) { + this.query=query; + this.traceLevel=traceLevel; + reset(); + ruleEvaluation=new RuleEvaluation(this); + } + + /** Resets the item iterator to point to the first item */ + public void reset() { + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + currentIndex=0; + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + } + + /** Sets the item iterator to point to the last item: */ + public void setToLast() { // PGA + if (flattenedItems!=null) + currentIndex = flattenedItems.size()-1; + else + currentIndex = -1; + } + + /** Resets the item iterator to point to the last item: */ + public void resetToLast() { // PGA + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + currentIndex = flattenedItems.size()-1; + } + + public Query getQuery() { return query; } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + void addMatchDigest(int digest) { matchDigests.add(new Integer(digest)); } + + boolean hasMatchDigest(int matchDigest) { return matchDigests.contains(new Integer(matchDigest)); } + + int getPreviousQuerySize() { return previousQuerySize; } + + public int getQuerySize() { return flattenedItems.size(); } + + /** Advances to the next item as current item */ + public void next() { + currentIndex++; + } + + public void previous() {//PGA + currentIndex--; + } + + + /** Returns the current item, or null if there is no more elements */ + public FlattenedItem currentItem() { + if ( (currentIndex>=flattenedItems.size()) || (currentIndex<0)) return null; //PGA + return flattenedItems.get(currentIndex); + } + + /** Returns a fresh rule evaluation starting at the current position of this */ + public RuleEvaluation freshRuleEvaluation() { + ruleEvaluation.initialize(flattenedItems,currentIndex); + return ruleEvaluation; + } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + // TODO: Add this functionality to Query? + public void addItem(Item item, TermType termType) { + Item root= query.getModel().getQueryTree().getRoot(); + if (root==null) + query.getModel().getQueryTree().setRoot(item); + else + query.getModel().getQueryTree().setRoot(combineItems(root,item,termType)); + } + + /** Removes this item */ + public void removeItem(Item item) { + item.getParent().removeItem(item); + } + + /** + * Removes this item by identity to ensure we remove the right one if there are multiple + * equal items + */ + public void removeItemByIdentity(Item item) { + int position=findIndexByIdentity(item); + if (position>=0) + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Fallback to removeField by equal() + } + + private int findIndexByIdentity(Item item) { + int position=0; + for (Iterator<Item> i=item.getParent().getItemIterator(); i.hasNext(); ) { + Item child=i.next(); + if (item==child) { + return position; + } + position++; + } + return -1; + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + Item removeCandidate=item.getParent().getItem(position); + if (removeCandidate.equals(item)) // Remove based on position + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Otherwise, just removeField any such item + } + + /** + * Convert segment items into their mutable counterpart, do not update query tree. + * Non-segment items are returned directly. + * + * @return a mutable CompositeItem instance + */ + private CompositeItem convertSegmentItem(CompositeItem item) { + if (!(item instanceof SegmentItem)) { + return item; + } + CompositeItem converted = null; + if (item instanceof AndSegmentItem) { + converted = new AndItem(); + } else if (item instanceof PhraseSegmentItem) { + PhraseItem p = new PhraseItem(); + PhraseSegmentItem old = (PhraseSegmentItem) item; + p.setIndexName(old.getIndexName()); + converted = p; + } else { + // TODO: Do something else than nothing for unknowns? + return item; + } + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + converted.addItem(i.next()); + } + return converted; + } + + + private void insertMutableInTree(CompositeItem mutable, CompositeItem original, CompositeItem parent) { + if (parent == null) { + query.getModel().getQueryTree().setRoot(mutable); + + } else { + int parentsIndex = parent.getItemIndex(original); + parent.setItem(parentsIndex, mutable); + } + } + + /** + * Convert The parent of this item into a mutable item. Note, this + * may change the shape of the query tree. (E.g. if the original parent is a + * segment phrase, and the original parent's parent is a phrase, the terms + * from the parent will be moved to the parent's parent.) + * + * @param item The item for which the parent shall be made mutable + */ + public void makeParentMutable(TermItem item) { + CompositeItem parent = item.getParent(); + CompositeItem mutable = convertSegmentItem(parent); + if (parent != mutable) { + CompositeItem parentsParent = parent.getParent(); + insertMutableInTree(mutable, parent, parentsParent); + } + } + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param desiredParentType the desired type of the composite which contains item when this returns + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType desiredParentType) { + if (parent==null) { // TODO: Accommodate for termtype in this case too + query.getModel().getQueryTree().setRoot(item); + + return; + } + + if (parent.getItemCount()>0 && parent instanceof QueryTree && parent.getItem(0) instanceof CompositeItem) { + // combine with the existing root instead + parent=(CompositeItem)parent.getItem(0); + if (index==1) { // that means adding it after the existing root + index=parent.getItemCount(); + } + } + + if (( desiredParentType==TermType.DEFAULT || desiredParentType.hasItemClass(parent.getClass()) ) + && equalIndexNameIfParentIsPhrase(item,parent)) { + addItem(parent,index,item,desiredParentType); + } + else { + insertIncompatibleItem(item,parent,query,desiredParentType); + } + } + + private void addItem(CompositeItem parent,int index,Item item,TermType desiredParentType) { + if (parent instanceof NotItem) { + if (index==0 && parent.getItem(0)==null) { // Case 1: The current positive is null and we are adding a positive + parent.setItem(0,item); + } + else if (index<=1 && !(parent.getItem(0) instanceof CompositeItem)) { // Case 2: The positive must become a composite + CompositeItem positiveComposite=(CompositeItem)desiredParentType.createItemClass(); + positiveComposite.addItem(parent.getItem(0)); + positiveComposite.addItem(index,item); + parent.setItem(0,positiveComposite); + } + else if (parent.getItem(0)!=null && parent.getItem(0) instanceof CompositeItem // Case 3: Add to the positive composite + && index<=((CompositeItem)parent.getItem(0)).getItemCount()) { + ((CompositeItem)parent.getItem(0)).addItem(index,item); + } + else { // Case 4: Add negative + parent.addItem(index,item); + } + } + else if (parent.getItemCount()>0 && parent instanceof QueryTree) { + CompositeItem composite=(CompositeItem)desiredParentType.createItemClass(); + composite.addItem(parent.getItem(0)); + composite.addItem(index,item); + parent.setItem(0,composite); + } + else { + parent.addItem(index,item); + } + } + + /** A special purpose check used to simplify the above */ + private boolean equalIndexNameIfParentIsPhrase(Item item,CompositeItem parent) { + if ( ! (parent instanceof PhraseItem)) return true; + if ( ! (item instanceof IndexedItem)) return true; + + return ((PhraseItem)parent).getIndexName().equals(((IndexedItem)item).getIndexName()); + } + + private void insertIncompatibleItem(Item item,CompositeItem parent,Query query,TermType desiredParentType) { + // Create new parent + CompositeItem newParent; + if (desiredParentType==TermType.DEFAULT) + newParent=new AndItem(); + else + newParent=(CompositeItem)desiredParentType.createItemClass(); + + // Save previous parent parent + CompositeItem parentsParent=parent.getParent(); + + // Add items to new parent + newParent.addItem(parent); + newParent.addItem(item); + + // Insert new parent as root or child of old parents parent + if (parentsParent==null) { + query.getModel().getQueryTree().setRoot(newParent); + + } + else { + int parentIndex=0; + if (parentsParent!=null) { + parentIndex=parentsParent.getItemIndex(parent); + } + parentsParent.setItem(parentIndex,newParent); + } + } + + private Item combineItems(Item first,Item second,TermType termType) { + if (first instanceof NullItem) { + return second; + } else if (first instanceof NotItem) { + NotItem notItem=(NotItem)first; + if (termType==TermType.NOT) { + notItem.addNegativeItem(second); + } + else { + Item newPositive=combineItems(notItem.getPositiveItem(),second,termType); + notItem.setPositiveItem(newPositive); + } + return notItem; + } + else if (first instanceof CompositeItem) { + CompositeItem composite=(CompositeItem)first; + CompositeItem combined=createType(termType); + if (combined.getClass().equals(composite.getClass())) { + composite.addItem(second); + return composite; + } + else { + combined.addItem(first); + combined.addItem(second); // Also works for nots + return combined; + } + } + else if (first instanceof TermItem) { + CompositeItem combined=createType(termType); + combined.addItem(first); + combined.addItem(second); + return combined; + } + else { + throw new RuntimeException("Don't know how to add an item to type " + first.getClass()); + } + } + + private CompositeItem createType(TermType termType) { + if (termType==TermType.DEFAULT) { + if (query.getModel().getType().equals(Query.Type.ANY)) + return new OrItem(); + else + return new AndItem(); + } + else if (termType==TermType.AND) { + return new AndItem(); + } + else if (termType==TermType.OR) { + return new OrItem(); + } + else if (termType==TermType.RANK) { + return new RankItem(); + } + else if (termType==TermType.NOT) { + return new NotItem(); + } + throw new IllegalArgumentException("Programing error, this method should be updated with add in RankType"); + } + + private void flatten(Item item,int position,List<FlattenedItem> toList) { + if (item==null) return; + if (item.isFilter()) return; + + if (item instanceof TermItem) { // make eligible for matching + toList.add(new FlattenedItem((TermItem)item,position)); + return; + } + + if (item instanceof CompositeItem) { // make children eligible for matching + CompositeItem composite=(CompositeItem)item; + int childPosition=0; + for (Iterator<?> i=composite.getItemIterator(); i.hasNext(); ) { + flatten((Item)i.next(),childPosition++,toList); + } + } + + // other terms are unmatchable + } + + public void trace(int level,String message) { + if (level>getTraceLevel()) return; + query.trace(traceIndentation + message,false,1); + } + + /** + * The amount of context information to collect about this evaluation. + * 0 (the default) means no context information, higher numbers means + * more context information. + */ + public int getTraceLevel() { return traceLevel; } + + public void indentTrace() { + traceIndentation=traceIndentation + " "; + } + + public void unindentTrace() { + if (traceIndentation.length()<3) + traceIndentation=""; + else + traceIndentation=traceIndentation.substring(3); + } + + public NameSpace getNameSpace(String nameSpaceName) { + if (nameSpaceName.equals("parameter")) { + if (parameterNameSpace==null) + parameterNameSpace=new ParameterNameSpace(); + return parameterNameSpace; + } + + // That's all for now + throw new RuntimeException("Unknown namespace '" + nameSpaceName + "'"); + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java new file mode 100644 index 00000000000..00a66206b46 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * Thrown on semantic exceptions on evaluation over a rule base + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class EvaluationException extends RuntimeException { + + public EvaluationException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java new file mode 100644 index 00000000000..1631d60df6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.TermItem; + +/** + * An item which knows its position in its parent + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class FlattenedItem { + + private TermItem item; + + /** The position of this item in its parent */ + private int position; + + public FlattenedItem(TermItem item,int position) { + this.item=item; + this.position=position; + } + + public TermItem getItem() { return item; } + + public int getPosition() { return position; } + + public String toString() { + return position + ":" + item; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java new file mode 100644 index 00000000000..fc7aec62412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; + +/** + * A match + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Match { + + /** The start position of this match */ + private int position; + + private TermItem item; + + /** The string to replace the match by, usually item.getIndexedString() */ + private String replaceValue; + + /** The parent of the matched item */ + private CompositeItem parent=null; + + /** + * Creates a match + * + * @param item the match to add + * @param replaceValue the string to replace this match by, usually the item.getIndexedString() + * which is what the replace value will be if it is passed as null here + */ + public Match(FlattenedItem item,String replaceValue) { + this.item=item.getItem(); + if (replaceValue==null) + this.replaceValue=item.getItem().getIndexedString(); + else + this.replaceValue=replaceValue; + this.parent=this.item.getParent(); + this.position=item.getPosition(); + } + + public int getPosition() { return position; } + + public TermItem getItem() { return item; } + + public String getReplaceValue() { + return replaceValue; + } + + /** + * Returns the parent in which the item was matched, or null if the item was root. + * Note that the item may subsequently have been removed, so it does not necessarily + * have this parent + */ + public CompositeItem getParent() { return parent; } + + public int hashCode() { + return + 17*item.getIndexedString().hashCode()+ + 33*item.getIndexName().hashCode(); + } + + /** Returns a new item representing this match */ + public Item toItem(String label) { + return new WordItem(getReplaceValue(),label); + } + + public boolean equals(Object o) { + if (! (o instanceof Match)) return false; + + Match other=(Match)o; + if (other.position!=position) return false; + if (!other.item.equals(item)) return false; + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java new file mode 100644 index 00000000000..76eea63bd68 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * A collection of facts (addressed by namespace.fact in conditions) + * over which we may write conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class NameSpace { + + public abstract boolean matches(String term,RuleEvaluation e); + + // TODO: public abstract void produce(RuleEvaluation e); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java new file mode 100644 index 00000000000..35427250511 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; + +/** + * A name space representing the (http) parameters following this query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ParameterNameSpace extends NameSpace { + + public boolean matches(String term,RuleEvaluation e) { + Query query=e.getEvaluation().getQuery(); + String value=query.properties().getString(term); + if (value==null) return false; + e.setValue(value); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java new file mode 100644 index 00000000000..cb7d2af8d19 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; + +/** + * The Matches referenced by a particular context name in a rule evaluation + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferencedMatches { + + private String contextName; + + private List<Match> matches=new java.util.ArrayList<>(1); + + public ReferencedMatches(String contextName) { + this.contextName=contextName; + } + + public void addMatch(Match match) { + matches.add(match); + } + + public String getContextName() { return contextName; } + + public Iterator<Match> matchIterator() { + return matches.iterator(); + } + + /** + * Returns the item to insert from these referenced matches, or null if none + * + * @param label the label of the matches + */ + public Item toItem(String label) { + if (matches.size()==0) return null; + if (matches.size()==1) return matches.get(0).toItem(label); + + PhraseItem phrase=new PhraseItem(); // TODO: Somehow allow AND items instead here + phrase.setIndexName(label); + for (Iterator<Match> i=matches.iterator(); i.hasNext(); ) { + phrase.addItem(i.next().toItem(label)); + } + return phrase; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java new file mode 100644 index 00000000000..ee874b76ed6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.ListIterator; + +/** + * Evaluates the rules of a rule base. This method is thread safe on analyze calls, but + * not on modification calls. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEngine { + + private RuleBase rules; + + public RuleEngine(RuleBase rules) { + this.rules=rules; + } + + /** + * Evaluates a rule base over a query + * + * @param query the query to evaluate + * @param traceLevel the level of tracing to do + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String evaluate(Query query,int traceLevel) { + // TODO: This is O(query size*rule base size). We'll eventually need to create indices + // on rules to look up rule candidates per term to make it O(query size) instead + // Probably create indices on the first term like Prolog implementations use to + + boolean matchedAnything=false; + Evaluation evaluation=new Evaluation(query,traceLevel); + evaluation.setStemming(rules.getStemming()); + evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); + for (ListIterator<ProductionRule> i=rules.ruleIterator(); i.hasNext(); ) { + evaluation.reset(); + ProductionRule rule=i.next(); + boolean matched=matchRuleAtAllStartPoints(evaluation,rule); + matchedAnything|=matched; + } + + if (!matchedAnything) return null; + + String error=QueryCanonicalizer.canonicalize(query); + + if (query.getTraceLevel()>=1) + query.trace("SemanticSearcher: Rewrote query",true,1); + + return error; + } + + /** Match a rule at any starting point in the query */ + private boolean matchRuleAtAllStartPoints(Evaluation evaluation, ProductionRule rule) { + boolean matchedAtLeastOnce=false; + int iterationCount=0; + + /** + * Test if it is a removal rule, if so iterate backwards so that precalculated + * replacement positions does not become invalid as the query shrink + */ + boolean removalRule = false; + if ( (rule instanceof com.yahoo.prelude.semantics.rule.ReplacingProductionRule) && + (rule.getProduction().toString().length() == 0) ) { // empty replacement + removalRule = true; + evaluation.setToLast(); + } + + int loopLimit=Math.max(15,evaluation.getQuerySize()*3); + + while (evaluation.currentItem() != null) { + boolean matched=matchRule(evaluation,rule); + if (matched) { + if (removalRule) + evaluation.resetToLast(); + else + evaluation.reset(); + matchedAtLeastOnce = true; + if (rule.isLoop()) break; + } + else { + if (removalRule) + evaluation.previous(); + else + evaluation.next(); + } + + if (matched && iterationCount++ > loopLimit) { + throw new RuleBaseException("Rule '" + rule + "' has matched '" + + evaluation.getQuery().getModel().getQueryTree().getRoot() + + "' " + loopLimit + " times, aborting"); + } + } + + return matchedAtLeastOnce; + } + + /** + * Matches a rule at the current starting point of the evaluation, and carries + * out the production if there is a match + * + * @return whether this rule matched + */ + // TODO: Code cleanup + private boolean matchRule(Evaluation evaluation, ProductionRule rule) { + RuleEvaluation ruleEvaluation=evaluation.freshRuleEvaluation(); + + ruleEvaluation.indentTrace(); + if (ruleEvaluation.getTraceLevel()>=3) { + ruleEvaluation.trace(3,"Evaluating rule '" + rule + + "' on '" + ruleEvaluation.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + + "' at '" + ruleEvaluation.currentItem() + "':"); + } + + ruleEvaluation.indentTrace(); + + boolean matches=rule.matches(ruleEvaluation); + + boolean matchedBefore=false; + int currentMatchDigest=ruleEvaluation.calculateMatchDigest(rule); + if (evaluation.hasMatchDigest(currentMatchDigest)) + matchedBefore=true; + + boolean queryGotShorter=false; + if (evaluation.getPreviousQuerySize()>evaluation.getQuerySize()) + queryGotShorter=true; + + boolean doProduction=!matchedBefore || queryGotShorter; + + ruleEvaluation.unindentTrace(); + + if (ruleEvaluation.getTraceLevel()>=2) { + if (matches && doProduction) + ruleEvaluation.trace(2,"Matched rule '" + rule + "' at " + ruleEvaluation.previousItem()); + else if (!matches) + ruleEvaluation.trace(2,"Did not match rule '" + rule + "' at " + ruleEvaluation.currentItem()); + else if (!doProduction) + ruleEvaluation.trace(2,"Ignoring repeated match of '" + rule + "'"); + } + + ruleEvaluation.unindentTrace(); + + if (!matches || !doProduction) return false; + + // Do production barrier + + evaluation.addMatchDigest(currentMatchDigest); + String preQuery=null; + if (evaluation.getTraceLevel()>=1) { + preQuery= evaluation.getQuery().getModel().getQueryTree().getRoot().toString(); + } + rule.produce(ruleEvaluation); + if (evaluation.getTraceLevel()>=1) { + evaluation.trace(1,"Transforming '" + preQuery + "' to '" + + evaluation.getQuery().getModel().getQueryTree().getRoot().toString() + + "' since '" + rule + "' matched"); + } + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java new file mode 100644 index 00000000000..a6b90f98879 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java @@ -0,0 +1,346 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.*; + +/** + * A particular evalutation of a particular rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEvaluation { + + // TODO: Create a query builder (or something) though which all query manipulation + // here and in Evaluation is done. This class must also hold all the matches + // and probably be able to update the match positions to keep them in sync with changes + // to the query + + // Remember that whenever state is added to this class, you + // must consider whether/how to make that state backtrackable + // by savinginformation in choicepoint.state + + /** The items to match in this evaluation */ + private List<FlattenedItem> items; + + /** The current position into the list of items */ + private int position; + + /** The start position into the item list */ + private int startPosition; + + /** The references to matched contexts to be made in this evaluation */ + private Set<String> matchReferences; + + /** The current context of this evaluation, or null we're currently not in an interesting context */ + private String currentContext; + + /** A list of referencedMatches */ + private List<ReferencedMatches> referencedMatchesList =new java.util.ArrayList<>(); + + private List<Match> nonreferencedMatches=new java.util.ArrayList<>(); + + /** The evaluation owning this */ + private Evaluation evaluation; + + /** The choice points saved in this evaluation */ + private Stack<Choicepoint> choicepoints=null; + + /* The last value returned by a condition evaluated in this, may be null */ + private Object value=null; + + /** True when we are evaluating inside a condition which inverts the truth value */ + private boolean inNegation=false; + + /** + * A label we should use to match candidate terms for. + * Used to propagate a label from e.g. reference conditions to named conditions + */ + private String currentLabel=null; + + public RuleEvaluation(Evaluation owner) { + this.evaluation=owner; + } + + public void initialize(List<FlattenedItem> list,int startPosition) { + this.startPosition=startPosition; + items=list; + reinitialize(); + } + + void reinitialize() { + position=startPosition; + currentContext=null; + referencedMatchesList.clear(); + nonreferencedMatches.clear(); + if (choicepoints!=null) + choicepoints.clear(); + } + + public void setMatchReferences(Set<String> matchReferences) { this.matchReferences=matchReferences; } + + /** + * <p>Calculates an id which is unique for each match (the totality of the matched terms) + * to a high probability. Why can we not simply look at the position + * of terms? Because rules are allowed to modify the query tree in ways that makes positions + * change.</p> + * + * <p>This digest is also problematic, because it's really the matching condition who should + * calculate a match digest for that term which incorporates the semantics of that kind + * of match (maybe not the word and index, but something else). This is a todo for when + * we add other kinds of conditions.</p> + */ + int calculateMatchDigest(ProductionRule rule) { + int matchDigest=rule.hashCode(); + int matchCounter=1; + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + int termCounter=0; + for (Iterator<Match> j=matches.matchIterator(); j.hasNext(); ) { + Match match=j.next(); + matchDigest=7*matchDigest*matchCounter+ + 71*termCounter+ + match.hashCode(); + termCounter++; + } + matchCounter++; + } + for (Iterator<Match> i=nonreferencedMatches.iterator(); i.hasNext(); ) { + Match match=i.next(); + matchDigest=7*matchDigest*matchCounter+match.hashCode(); + matchCounter++; + } + return matchDigest; + } + + /** + * Returns the current term item to look at, + * or null if there are no more elements + */ + public FlattenedItem currentItem() { + if (position>=items.size()) return null; + return items.get(position); + } + + public FlattenedItem previousItem() { + if (position-1<0) return null; + return items.get(position-1); + } + + /** Returns the position of the current item */ + public int currentPosition() { + return position; + } + + /** Sets the current position */ + public void setPosition(int position) { + this.position=position; + } + + /** Returns the total number of items to match in this evaluation */ + public int itemCount() { + return items.size() - startPosition; + } + + /** Returns the last value returned by a condition in this evaluation, or null */ + public Object getValue() { return value; } + + /** Sets the last value returned by a condition in this evaluatiino, or null */ + public void setValue(Object value) { this.value=value; } + + /** Returns whether we are evaluating inside a condition which inverts the truth value */ + public boolean isInNegation() { return inNegation; } + + /** sets whether we are evaluating inside a condition which inverts the truth value */ + public void setInNegation(boolean inNegation) { this.inNegation=inNegation; } + + /** Returns the current position into the terms this evaluates over */ + public int getPosition() { return position; } + + /** Sets a new current label and returns the previous one */ + public String setCurrentLabel(String currentLabel) { + String oldLabel=currentLabel; + this.currentLabel=currentLabel; + return oldLabel; + } + + public String getCurrentLabel() { return currentLabel; } + + /** + * Advances currentItem to the next term item and returns thatItem. + * If the current item before this call is the last item, this will + * return (and set currentItem to) null. + */ + public FlattenedItem next() { + position++; + + if (position>=items.size()) { + position=items.size(); + return null; + } + + return items.get(position); + } + + // TODO: Simplistic yet. Nedd to support context nesting etc. + public void entering(String context) { + if (context==null) return; + if (matchReferences!=null && matchReferences.contains(context)) + currentContext=context; + + } + + public void leaving(String context) { + if (context==null) return; + if (currentContext==null) return; + if (currentContext.equals(context)) + currentContext=null; + } + + /** + * Adds a match + * + * @param item the match to add + * @param replaceString the string to replace this match by, usually the item.getIndexedValue() + */ + public void addMatch(FlattenedItem item,String replaceString) { + evaluation.makeParentMutable(item.getItem()); + Match match=new Match(item,replaceString); + if (currentContext!=null) { + ReferencedMatches matches=getReferencedMatches(currentContext); + if (matches==null) { + matches=new ReferencedMatches(currentContext); + referencedMatchesList.add(matches); + } + matches.addMatch(match); + } + else { + nonreferencedMatches.add(match); + } + } + + /** Returns the referenced matches for a context name, or null if none */ + public ReferencedMatches getReferencedMatches(String name) { + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + if (name.equals(matches.getContextName())) + return matches; + } + return null; + } + + public int getReferencedMatchCount() { return referencedMatchesList.size(); } + + public int getNonreferencedMatchCount() { return nonreferencedMatches.size(); } + + /** Returns the evaluation this belongs to */ + public Evaluation getEvaluation() { return evaluation; } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + public void addItem(Item item, TermType termType) { + evaluation.addItem(item,termType); + } + + public void removeItem(Item item) { + evaluation.removeItem(item); + } + + public void removeItemByIdentity(Item item) { + evaluation.removeItemByIdentity(item); + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + evaluation.removeItem(position,item); + } + + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param termType the kind of item to index, this decides the resulting structure + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType termType) { + evaluation.insertItem(item,parent,index,termType); + } + + /** Returns a read-only view of the items of this */ + public List<FlattenedItem> items() { + return Collections.unmodifiableList(items); + } + + public Match getNonreferencedMatch(int index) { + return nonreferencedMatches.get(index); + } + + public void trace(int level,String string) { + evaluation.trace(level,string); + } + + public int getTraceLevel() { + return evaluation.getTraceLevel(); + } + + public void indentTrace() { + evaluation.indentTrace(); + } + + public void unindentTrace() { + evaluation.unindentTrace(); + } + + /** + * Add a choice point to this evaluation + * + * @param condition the creating condition + * @param create true to create this choicepoint if it is missing + * @return the choicepoint, or null if not present, and create is false + */ + public Choicepoint getChoicepoint(Condition condition,boolean create) { + if (choicepoints==null) { + if (!create) return null; + choicepoints=new java.util.Stack<>(); + } + Choicepoint choicepoint=lookupChoicepoint(condition); + if (choicepoint==null) { + if (!create) return null; + choicepoint=new Choicepoint(this,condition); + choicepoints.push(choicepoint); + } + return choicepoint; + } + + private Choicepoint lookupChoicepoint(Condition condition) { + for (Iterator<Choicepoint> i=choicepoints.iterator(); i.hasNext(); ) { + Choicepoint choicepoint=i.next(); + if (condition==choicepoint.getCondition()) + return choicepoint; + } + return null; + } + + List<ReferencedMatches> referencedMatches() { + return referencedMatchesList; + } + + List<Match> nonreferencedMatches() { + return nonreferencedMatches; + } + + /** Remove all the terms recognized by this match */ + public void removeMatches(ReferencedMatches matches) { + for (Iterator<Match> i=matches.matchIterator(); i.hasNext(); ) { + Match match=i.next(); + removeItemByIdentity(match.getItem()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java new file mode 100644 index 00000000000..6adbd065352 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java new file mode 100644 index 00000000000..309c3f7a456 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java new file mode 100644 index 00000000000..91eef25a8b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +/** + * A production rule which <i>adds</i> the production to the matched query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AddingProductionRule extends ProductionRule { + + protected String getSymbol() { return "+>"; } + + public void setProduction(ProductionList productionList) { + super.setProduction(productionList); + productionList.setReplacing(false); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java new file mode 100644 index 00000000000..2c826df9196 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at any location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AndCondition extends CompositeCondition { + + // TODO: Not in use. What was this for? Remove? + + public AndCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + protected String toInnerString() { + return toInnerString(" & "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java new file mode 100644 index 00000000000..5cf3d4bf7a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at a specific location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ChoiceCondition extends CompositeCondition { + + public ChoiceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + //if (e.currentItem()==null) return false; + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition= i.next(); + if (subCondition.matches(e)) + return true; + } + + return false; + } + + protected String toInnerString() { + return toInnerString(", "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java new file mode 100644 index 00000000000..0d24368cf28 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is true of the <i>values</i> of its two subconditions are true + * and both have the same value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ComparisonCondition extends CompositeCondition { + + private Operator operator; + + public ComparisonCondition(Condition leftCondition,String operatorString,Condition rightCondition) { + operator=Operator.get(operatorString); + addCondition(leftCondition); + addCondition(rightCondition); + } + + protected boolean doesMatch(RuleEvaluation evaluation) { + Object left=null; + Object right=null; + boolean matches=false; + Choicepoint choicepoint=evaluation.getChoicepoint(this,true); + try { + matches=getLeftCondition().matches(evaluation); + if (!matches) return false; + + left=evaluation.getValue(); + evaluation.setValue(null); + + choicepoint.backtrackPosition(); + matches=getRightCondition().matches(evaluation); + if (!matches) return false; + + right=evaluation.getValue(); + evaluation.setValue(right); + matches=operator.compare(left,right); + return matches; + } + finally { + if (!matches) + choicepoint.backtrack(); + traceResult(matches,evaluation,left,right); + } + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + // Uses our own logging method instead + } + + protected void traceResult(boolean matches,RuleEvaluation e,Object left,Object right) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem() + " as " + left + operator + right + " is true"); + if (!matches && e.getTraceLevel()>=3) + e.trace(3,"Did not match '" + this + "' at " + e.currentItem() + " as " + left + operator + right + " is false"); + } + + public Condition getLeftCondition() { + return getCondition(0); + } + + public void setLeftCondition(Condition leftCondition) { + setCondition(0,leftCondition); + } + + public Condition getRightCondition() { + return getCondition(1); + } + + public void setRightCondition(Condition rightCondition) { + setCondition(1,rightCondition); + } + + protected String toInnerString() { + return toInnerString(operator.toString()); + } + + private static final class Operator { + + private String operatorString; + + private static Map<String, Operator> operators=new HashMap<>(); + + public static final Operator equals=new Operator("="); + public static final Operator largerequals=new Operator(">="); + public static final Operator smallerequals=new Operator("<="); + public static final Operator larger=new Operator(">"); + public static final Operator smaller=new Operator("<"); + public static final Operator different=new Operator("!="); + public static final Operator contains=new Operator("=~"); + + private Operator(String operator) { + this.operatorString=operator; + operators.put(operatorString,this); + } + + private static Operator get(String operatorString) { + Operator operator=operators.get(operatorString); + if (operator==null) + throw new IllegalArgumentException("Unknown operator '" + operatorString + "'"); + return operator; + } + + public boolean compare(Object left,Object right) { + if (this==equals) + return equals(left,right); + if (this==different) + return !equals(left,right); + + if (left==null || right==null) return false; + + if (this==contains) + return contains(left,right); + if (this==largerequals) + return larger(left,right) || equals(left,right); + if (this==smallerequals) + return !larger(left,right); + if (this==larger) + return larger(left,right); + if (this==smaller) + return !larger(left,right) && !equals(left,right); + throw new RuntimeException("Programming error, fix this method"); + } + + private boolean equals(Object left,Object right) { + if (left==null && right==null) return true; + if (left==null) return false; + return left.equals(right); + } + + /** True if left contains right */ + private boolean contains(Object left,Object right) { + if (left instanceof Collection) + return ((Collection<?>)left).contains(right); + else + return left.toString().indexOf(right.toString())>=0; + } + + /** true if left is larger than right */ + private boolean larger(Object left,Object right) { + if ((left instanceof Number) && (right instanceof Number)) + return ((Number)left).doubleValue()>((Number)right).doubleValue(); + else + return left.toString().compareTo(right.toString())>0; + } + + public int hashCode() { + return operatorString.hashCode(); + } + + public boolean equals(Object other) { + if ( ! (other instanceof Operator)) return false; + return other.toString().equals(this.toString()); + } + + public String toString() { + return operatorString; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java new file mode 100644 index 00000000000..e7fd8d599d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which contains a list of conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class CompositeCondition extends Condition { + + private List<Condition> conditions=new java.util.ArrayList<>(); + + public CompositeCondition() { + } + + public void preMatchHook(RuleEvaluation e) { + super.preMatchHook(e); + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "'" + " at " + e.currentItem()); + e.indentTrace(); + } + } + + public void postMatchHook(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (subCondition.hasOpenChoicepoint(evaluation)) + return true; + } + return false; + } + + public void addCondition(Condition condition) { + conditions.add(condition); + condition.setParent(this); + } + + /** Sets the condition at the given index */ + public void setCondition(int index,Condition condition) { + conditions.set(index,condition); + } + + /** Returns the number of subconditions */ + public int conditionSize() { return conditions.size(); } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the condition at this index + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition getCondition(int i) { + return conditions.get(i); + } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the removed condition + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition removeCondition(int i) { + Condition condition=conditions.remove(i); + condition.setParent(null); + return condition; + } + + /** Returns an iterator of the immediate children of this condition */ + public Iterator<Condition> conditionIterator() { return conditions.iterator(); } + + public void makeReferences(RuleBase rules) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition condition=i.next(); + condition.makeReferences(rules); + } + } + + /** Whether this should be output with parentheses, default is parent!=null */ + protected boolean useParentheses() { + return getParent()!=null; + } + + protected String toInnerString(String conditionSeparator) { + if (getLabel()!=null) + return getLabel() + ":(" + conditionsToString(conditionSeparator) + ")"; + else if (useParentheses()) + return "(" + conditionsToString(conditionSeparator) + ")"; + else + return conditionsToString(conditionSeparator); + } + + protected final String conditionsToString(String conditionSeparator) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(conditionSeparator); + } + return buffer.toString(); + } + + /** Returns whether all the conditions of this matches the current evaluation state */ + protected final boolean allSubConditionsMatches(RuleEvaluation e) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (!subCondition.matches(e)) + return false; + } + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java new file mode 100644 index 00000000000..18fbbb04412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition on the presense of a particular kind of composite item (possibly also with a particular content) + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @since 5.1.15 + */ +public class CompositeItemCondition extends CompositeCondition { + + @Override + protected boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint = e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches = e.currentItem().getItem().getParent() instanceof PhraseItem + && allSubConditionsMatches(e); + if ( ! matches) + choicepoint.backtrack(); + return matches; + + } + + @Override + protected String toInnerString() { + if (getLabel()!=null) + return getLabel() + ":(" + toInnerStringBody() + ")"; + else if (useParentheses()) + return "(" + toInnerStringBody() + ")"; + else + return toInnerStringBody(); + } + + private String toInnerStringBody() { + return "\"" + conditionsToString(" ") + "\""; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java new file mode 100644 index 00000000000..f2029ede6fa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * Superclass of all kinds of conditions of production rules + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Condition { + + /** The parent of this condition, or null if this is not nested */ + private CompositeCondition parent=null; + + /** + * The label of this condition, or null if none. + * Specified by label:condition + * The label is also the default context is no context is speficied explicitly + */ + private String label=null; + + /** + * The name space refered by this match, or null if the default (query) + * Specified by namespace.condition in rules. + */ + private String nameSpace=null; + + /** + * The name of the context created by this, or null if none + * Specified by context/condition in rules + */ + private String contextName; + + /** Position constraints of the terms matched by this condition */ + private Anchor anchor=Anchor.NONE; + + public static enum Anchor { + NONE, START, END, BOTH; + public static Anchor create(boolean start,boolean end) { + if (start && end) return Anchor.BOTH; + if (start) return Anchor.START; + if (end) return Anchor.END; + return NONE; + } + } + + public Condition() { + this(null,null); + } + + public Condition(String label) { + this(label,null); + } + + public Condition(String label,String context) { + this.label=label; + this.contextName=context; + } + + /** + * Sets the name whatever is matched by this condition can be refered as, or null + * to make it unreferable + */ + public void setContextName(String contextName) { this.contextName=contextName; } + + /** + * Returns the name whatever is matched by this condition can be refered as, or null + * if it is unreferable + */ + public String getContextName() { return contextName; } + + /** Returns whether this is referable, returns context!=null by default */ + protected boolean isReferable() { return contextName!=null; } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the name of the namespace of this, or null if default (query) */ + public String getNameSpace() { return nameSpace; } + + /** Sets the name of the namespace of this */ + public void setNameSpace(String nameSpace) { this.nameSpace=nameSpace; } + + /** Returns the condition this is nested within, or null if it is not nested */ + public CompositeCondition getParent() { return parent; } + + /** Called by CompositeCondition.addCondition() */ + void setParent(CompositeCondition parent) { this.parent=parent; } + + /** Sets a positional constraint on this condition */ + public void setAnchor(Anchor anchor) { this.anchor=anchor; } + + /** Returns the positional constraint on this anchor. This is never null */ + public Anchor getAnchor() { return anchor; } + + /** + * <p>Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. Calls the doesMatch + * method of each condition subtype.</p> + */ + public final boolean matches(RuleEvaluation e) { + // TODO: With this algoritm, each choice point will move to the next choice on each reevaluation + // In the case where there are multiple ellipses, we may want to do globally coordinated + // moves of all the choice points instead + try { + preMatchHook(e); + + if (!matchesStartAnchor(e)) return false; + + String higherLabel=e.getCurrentLabel(); + if (getLabel()!=null) + e.setCurrentLabel(getLabel()); + + boolean matches=doesMatch(e); + while (!matches && hasOpenChoicepoint(e)) { + matches=doesMatch(e); + } + + e.setCurrentLabel(higherLabel); + + if (!matchesEndAnchor(e)) return false; + + traceResult(matches,e); + return matches; + } + finally { + postMatchHook(e); + } + + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesStartAnchor(RuleEvaluation e) { + if (anchor!=Anchor.START && anchor!=Anchor.BOTH) return true; + if (e.getPosition()==0) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the start, which " + e.currentItem() + " isn't"); + return false; + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesEndAnchor(RuleEvaluation e) { + if (anchor!=Anchor.END && anchor!=Anchor.BOTH) return true; + if (e.getPosition()>=e.items().size()) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the end, which " + e.currentItem() + " isn't"); + return false; + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem()); + if (!matches && e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + + protected String getMatchInfoString(RuleEvaluation e) { + String matchInfo=getMatchInfo(e); + if (matchInfo==null) return ""; + return " as '" + matchInfo + "'"; + } + + /** + * Called when match is called, before anything else. + * Always call super.preMatchHook when overriding. + */ + protected void preMatchHook(RuleEvaluation e) { + e.entering(contextName); + } + + /** + * Called just before match returns, on any return condition including exceptions. + * Always call super.postMatchHook when overriding + */ + protected void postMatchHook(RuleEvaluation e) { + e.leaving(contextName); + } + + /** + * Override this to return a string describing what this condition has matched in this evaluation. + * Will only be called when this condition is actually matched in this condition + * + * @return info about what is matched, or null if there is no info to return (default) + */ + protected String getMatchInfo(RuleEvaluation e) { return null; } + + /** + * Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. If there is a + * match, the evaluation must be advanced to the location beyond + * the matching item(s) before this method returns. + */ + protected abstract boolean doesMatch(RuleEvaluation e); + + /** + * Returns whether there is an <i>open choice</i> in this or any of its subconditions. + * Returns false by default, must be overriden by conditions which may generate + * choices open accross multiple calls to matches, or contain such conditions. + */ + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + return false; + } + + /** Override if references needs to be set in this condition of its children */ + public void makeReferences(RuleBase rules) { } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** Whether the label matches the current item, true if there is no current item */ + protected boolean labelMatches(RuleEvaluation e) { + FlattenedItem flattenedItem=e.currentItem(); + if (flattenedItem==null) return true; + TermItem item=flattenedItem.getItem(); + if (item==null) return true; + return labelMatches(item,e); + } + + protected boolean labelMatches(TermItem evaluationTerm,RuleEvaluation e) { + String indexName=evaluationTerm.getIndexName(); + String label=getLabel(); + if (label==null) + label=e.getCurrentLabel(); + if ("".equals(indexName) && label==null) return true; + if (indexName.equals(label)) return true; + if (e.getTraceLevel()>=4) + e.trace(4,"'" + this + "' does not match, label of " + e.currentItem() + " was required to be " + label); + return false; + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + + protected boolean isDefaultContextName() { return false; } + + public String toString() { + String contextString=""; + String nameSpaceString=""; + if (contextName!=null && !isDefaultContextName()) + contextString=contextName + "/"; + if (getNameSpace()!=null) + nameSpaceString=getNameSpace() + "."; + return contextString + nameSpaceString + toInnerString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java new file mode 100644 index 00000000000..855a8b802ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +import java.util.Map; + +/** + * A reference to a named condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ConditionReference extends Condition { + + /** The name of the referenced rule */ + private String conditionName; + + /** + * The actual condition references by this, or null if not initialized or not found, + * or if this is really an automata reference + */ + private NamedCondition namedCondition; + + /** + * True if this condition should be looked up in the automata + * annotations of the item instead of by reference to another item + */ + private boolean automataLookup=false; + + public ConditionReference(String conditionName) { + this(null,conditionName); + } + + public ConditionReference(String label,String conditionName) { + super(label); + Validator.ensureNotNull("Name of referenced condition",conditionName); + this.conditionName=conditionName; + setContextName(conditionName); + } + + /** Returns the name of the referenced rule, never null */ + public String getConditionName() { return conditionName; } + + public void setConditionName(String name) { this.conditionName=name; } + + public boolean doesMatch(RuleEvaluation e) { + if (automataLookup) return automataMatch(e); + + if (namedCondition==null) + throw new EvaluationException("Condition reference '" + conditionName + + "' not found or not initialized"); + + return namedCondition.matches(e); + } + + private boolean automataMatch(RuleEvaluation e) { + FlattenedItem current=e.currentItem(); + if (current==null) return false; + + Object annotation=current.getItem().getAnnotation(conditionName); + if (annotation==null) return false; + if (! (annotation instanceof PhraseMatcher.Phrase)) return false; + + PhraseMatcher.Phrase phrase=(PhraseMatcher.Phrase)annotation; + + Choicepoint choicePoint=e.getChoicepoint(this,true); + boolean matches=automataMatchPhrase(phrase,e); + + if (!matches && e.isInNegation()) { // TODO: Temporary hack! Works for single items only + e.addMatch(current,null); + } + + if ((!matches && !e.isInNegation() || (matches && e.isInNegation()))) + choicePoint.backtrackPosition(); + + return matches; + } + + private boolean automataMatchPhrase(PhraseMatcher.Phrase phrase,RuleEvaluation e) { + for (PhraseMatcher.Phrase.MatchIterator i=phrase.itemIterator(); i.hasNext(); ) { + i.next(); + FlattenedItem current=e.currentItem(); + if (current==null) return false; + if (!labelMatches(e.currentItem().getItem(),e)) return false; + if (!e.isInNegation()) + e.addMatch(current,i.getReplace()); + e.next(); + } + if (phrase.getLength()>phrase.getBackedLength()) return false; // The underlying composite item has changed + return true; + } + + public void makeReferences(RuleBase ruleBase) { + namedCondition=ruleBase.getCondition(conditionName); + if (namedCondition==null) { // Then this may reference some automata value, if we have an automata + if (ruleBase.usesAutomata()) + automataLookup=true; + else + throw new RuleBaseException("Referenced condition '" + conditionName + + "' does not exist in " + ruleBase); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + if (namedCondition==null) return false; + return namedCondition.getCondition().hasOpenChoicepoint(e); + } + + protected boolean isDefaultContextName() { + return getContextName()==null || getContextName().equals(conditionName); + } + + protected String toInnerString() { + return "[" + conditionName + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java new file mode 100644 index 00000000000..84a470ff64e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which greedily matches anything, represented as "..." + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class EllipsisCondition extends Condition { + + /** Whether this ellipsis is actually referable (enclosed in []) or not */ + private boolean referable; + + /** Creates a referable ellipsis condition with no label */ + public EllipsisCondition() { + this(true); + } + + /** Creates an ellipsis condition with no label */ + public EllipsisCondition(boolean referable) { + this(null,referable); + } + + /** Creates an ellipsis condition */ + public EllipsisCondition(String label,boolean referable) { + super(label); + this.referable=referable; + if (referable) + setContextName("..."); + } + + public EllipsisCondition(String label,String context) { + super(label,context); + } + + public boolean doesMatch(RuleEvaluation e) { + // We use a choice point to remember which untried alternatives are not tried (if any) + // We never need to backtrack to this choice - backtracking is done by the parent + // if this choice gives a global invalid state + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) { // First try + choicepoint=e.getChoicepoint(this,true); + } + else { + if (!choicepoint.isOpen()) return false; + } + + // Match all the rest of the items the first time, then all except the last item and so on + int numberOfTermsToMatch=e.itemCount() - e.currentPosition() - choicepoint.tryCount(); + if (numberOfTermsToMatch<0) { + choicepoint.close(); + return false; + } + choicepoint.addTry(); + + String matchedTerms=matchTerms(numberOfTermsToMatch,e); + e.setValue(matchedTerms); + return true; + } + + private String matchTerms(int numberOfTerms,RuleEvaluation e) { + StringBuilder b=new StringBuilder(); + for (int i=0; i<numberOfTerms; i++) { + e.addMatch(e.currentItem(),e.currentItem().getItem().getIndexedString()); + b.append(e.currentItem().getItem().stringValue()); + if (i<(numberOfTerms-1)) + b.append(" "); + e.next(); + } + return b.toString(); + } + + public String getMatchInfo(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return null; + + return spaceSeparated(e.items().subList(choicepoint.getState().getPosition(), + e.itemCount() - choicepoint.tryCount() +1 )); + } + + private String spaceSeparated(List<FlattenedItem> items) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<FlattenedItem> i=items.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + + /** Returns whether this ellipsis condition can be referred from a production */ + public boolean isReferable() { + return referable || super.isReferable(); + } + + /** Sets whether this ellipsis condition can be referred from a production or not */ + public void setReferable(boolean referable) { + this.referable=referable; + if (referable && getContextName()==null) + setContextName("..."); + if (!referable && "...".equals(getContextName())) + setContextName(null); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return false; // Not tried yet + if (!choicepoint.isOpen()) return false; + return true; + } + + protected boolean isDefaultContextName() { + return (getContextName()==null || getContextName().equals("...")); + } + + protected String toInnerString() { + if (referable) + return "[...]"; + else + return "..."; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java new file mode 100644 index 00000000000..3cde8bba5ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is always true, and which has it's own value as return value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralCondition extends Condition { + + private String value; + + public LiteralCondition(String value) { + this.value=value; + } + + protected boolean doesMatch(RuleEvaluation e) { + e.setValue(value); + return true; + } + + public void setValue(String value) { this.value=value; } + + public String getValue() { return value; } + + public String toInnerString() { return "'" + value + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java new file mode 100644 index 00000000000..23404fbc6e2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal phrase produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralPhraseProduction extends TermProduction { + + private List<String> terms=new ArrayList<>(); + + /** Creates a new produced literal phrase */ + public LiteralPhraseProduction() { + super(); + } + + /** + * Creates a new produced literal phrase + * + * @param label the label of the produced term + */ + public LiteralPhraseProduction(String label) { + super(label); + } + + /** Adds a term to this phrase */ + public void addTerm(String term) { + Validator.ensureNotNull("A term in a produced phrase",term); + terms.add(term); + } + + /** Returns a read only view of the terms produced by this, never null */ + public List<String> getTerms() { return Collections.unmodifiableList(terms); } + + public void produce(RuleEvaluation e,int offset) { + PhraseItem newPhrase=new PhraseItem(); + newPhrase.setIndexName(getLabel()); + for (String term : terms) + newPhrase.addItem(new WordItem(term)); + + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newPhrase,offset); + } + else { + newPhrase.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newPhrase + "'"); + e.addItem(newPhrase,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + "\"" + getSpaceSeparated(terms) + "\""; + } + + private String getSpaceSeparated(List<String> terms) { + StringBuilder builder=new StringBuilder(); + for (Iterator<String> i=terms.iterator(); i.hasNext(); ) { + builder.append(i.next()); + if (i.hasNext()) + builder.append(" "); + } + return builder.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java new file mode 100644 index 00000000000..f157fd6901d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class LiteralTermProduction extends TermProduction { + + private String literal; + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + */ + public LiteralTermProduction(String literal) { + super(); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public LiteralTermProduction(String literal, TermType termType) { + super(termType); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param label the label of the produced term + * @param literal this term word + * @param termType the type of term to produce + */ + public LiteralTermProduction(String label,String literal, TermType termType) { + super(label,termType); + setLiteral(literal); + } + + /** The literal term value, never null */ + public void setLiteral(String literal) { + Validator.ensureNotNull("A produced term",literal); + this.literal=literal; + } + + /** Returns the term word produced, never null */ + public String getLiteral() { return literal; } + + public void produce(RuleEvaluation e,int offset) { + WordItem newItem=new WordItem(literal,getLabel()); + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newItem,offset); + } + else { + newItem.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newItem + "'"); + e.addItem(newItem,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + literal; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java new file mode 100644 index 00000000000..ca1d623847d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition given a name which enables it to be referenced from other conditions. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamedCondition { + + private String conditionName; + + private Condition condition; + + public NamedCondition(String name,Condition condition) { + this.conditionName=name; + this.condition=condition; + } + + public String getName() { return conditionName; } + + public void setName(String name) { this.conditionName = name; } + + public Condition getCondition() { return condition; } + + public void setCondition(Condition condition) { this.condition = condition; } + + public boolean matches(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "' at " + e.currentItem()); + e.indentTrace(); + } + + boolean matches=condition.matches(e); + + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + if (matches) + e.trace(3,"Matched '" + this + "' at " + e.previousItem()); + else if (e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + return matches; + } + + /** + * Returns the canonical string representation of this named condition. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return "[" + conditionName + "] :- " + condition.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java new file mode 100644 index 00000000000..0c73427ad82 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production in a specified namespace + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamespaceProduction extends Production { + + /** The label in this namespace */ + private String namespace; + + /** The key ito set in the namespace */ + private String key; + + /** The value to set in the namespace */ + private String value=null; + + /** Creates a produced template term with no label and the default type */ + public NamespaceProduction(String namespace,String key,String value) { + setNamespace(namespace); + this.key=key; + this.value=value; + } + + public String getNamespace() { return namespace; } + + public final void setNamespace(String namespace) { + if (!namespace.equals("parameter")) + throw new RuleBaseException("Can not produce into namespace '" + namespace + + ". Only the 'parameter' name space can be referenced currently"); + this.namespace = namespace; + } + + public String getKey() { return key; } + + public void setKey(String key) { this.key = key; } + + public String getValue() { return value; } + + public void setValue(String value) { this.value = value; } + + public void produce(RuleEvaluation e,int offset) { + e.getEvaluation().getQuery().properties().set(key, value); + } + + /** All instances of this produces a parseable string output */ + public String toInnerString() { + return namespace + "." + key + "='" + value + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java new file mode 100644 index 00000000000..64a10ea821a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which matches if its contained condition doesn't. + * NotCondition inverts the term checking but not the label checking. + * That is, it means "label:!term", it does not mean "!label:term". + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class NotCondition extends Condition { + + private Condition condition; + + public NotCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondtiion() { return condition; } + + public void setCondition(Condition condition) { this.condition=condition; } + + protected boolean doesMatch(RuleEvaluation e) { + e.setInNegation(!e.isInNegation()); + boolean matches=!condition.matches(e); + e.setInNegation(!e.isInNegation()); + return matches; + } + + public String toInnerString() { + return "!" + condition; + } + + public void makeReferences(RuleBase ruleBase) { + condition.makeReferences(ruleBase); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + return condition.hasOpenChoicepoint(evaluation); + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java new file mode 100644 index 00000000000..cc6a9c87fb0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Production { + + /** True to add, false to replace, default true */ + protected boolean replacing=true; + + /** The (0-base) position of this term in the productions of this rule */ + private int position=0; + + /** The weight (strength) of this production as a percentage (default is 100) */ + private int weight=100; + + /** Creates a produced template term with no label and the default type */ + public Production() { + } + + /** True to replace, false to add, if this production can do both. Default true. */ + public void setReplacing(boolean replacing) { this.replacing=replacing; } + + public int getPosition() { return position; } + + public void setPosition(int position) { this.position = position; } + + /** Sets the weight of this production as a percentage (default is 100) */ + public void setWeight(int weight) { this.weight=weight; } + + /** Returns the weight of this production as a percentage (default is 100) */ + public int getWeight() { return weight; } + + /** + * Produces this at the current match + * + * @param e the evaluation context containing the current match and the query + * @param offset the offset position at which to produce this. Offsets are used to produce multiple items + * at one position, inserted in the right order. + */ + public abstract void produce(RuleEvaluation e,int offset); + + /** + * Called to add the references into the condition of this rule made by this production + * into the given set. The default implementation is void, override for productions + * which refers to the condition + */ + void addMatchReferences(Set<String> matchReferences) { } + + /** All instances of this produces a parseable string output */ + public final String toString() { + return toInnerString() + (getWeight()!=100 ? ("!" + getWeight()) : ""); + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java new file mode 100644 index 00000000000..3397a9ada1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A list of the productions of a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ProductionList { + + private List<Production> productions =new java.util.ArrayList<>(); + + /** True to replace by the production, false to add it */ + private boolean replacing=true; + + public void addProduction(Production term) { + term.setReplacing(replacing); + term.setPosition(productions.size()); + productions.add(term); + } + + /** True to replace, false to add, default true */ + void setReplacing(boolean replacing) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production production=i.next(); + production.setReplacing(replacing); + } + + this.replacing=replacing; + } + + /** Returns an unmodifiable view of the productions in this */ + public List<Production> productionList() { return Collections.unmodifiableList(productions); } + + public int getTermCount() { return productions.size(); } + + void addMatchReferences(Set<String> matchReferences) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production term=i.next(); + term.addMatchReferences(matchReferences); + } + } + + public void produce(RuleEvaluation e) { + for (int i=0; i<productions.size(); i++) { + productions.get(i).produce(e,i); + } + } + + public String toString() { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java new file mode 100644 index 00000000000..55be2aa2afd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A query rewriting rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class ProductionRule { + + /** What must be true for this rule to be true */ + private Condition condition; + + /** What is produced when this rule is true */ + private ProductionList production=new ProductionList(); + + /** The set of match name Strings which the production part of this rule references */ + private Set<String> matchReferences=new java.util.LinkedHashSet<>(); + + /** Sets what must be true for this rule to be true */ + public void setCondition(Condition condition) { this.condition=condition; } + + public Condition getCondition() { return condition; } + + /** Sets what is produced when this rule is true */ + public void setProduction(ProductionList production) { this.production=production; } + + public ProductionList getProduction() { return production; } + + /** Returns whether this rule matches the given query */ + public boolean matches(RuleEvaluation e) { + e.setMatchReferences(matchReferences); + return condition.matches(e); + } + + /** + * Returns the set of context names the production of this rule references + * + * @return an unmodifiable Set of condition context name Strings + */ + public Set<String> matchReferences() { + return Collections.unmodifiableSet(matchReferences); + } + + public void makeReferences(RuleBase rules) { + condition.makeReferences(rules); + production.addMatchReferences(matchReferences); + } + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + production.produce(e); + } + + /** + * Returns the canonical string representation of this rule. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return condition.toString() + " " + getSymbol() + " " + production.toString(); + } + + /** + * Returns the symbol of this production rule. + * All rules are on the form <code>condition symbol production</code>. + */ + protected abstract String getSymbol(); + + /** + * Returns true if it is known that this rule matches its own output. + * If it does, it will only be evaluated once, to avoid infinite loops. + * This default implementation returns false; + */ + public boolean isLoop() { + // TODO: There are many more possible loops, we should probably detect + // a few more obvious ones + if (conditionIsEllipsAndOtherNameSpacesOnly(getCondition())) return true; + return false; + } + + private boolean conditionIsEllipsAndOtherNameSpacesOnly(Condition condition) { + if (condition instanceof EllipsisCondition) return true; + if (! (condition instanceof CompositeCondition)) return false; + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition child= i.next(); + if (child.getNameSpace()==null && conditionIsEllipsAndOtherNameSpacesOnly(child)) + return true; + } + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java new file mode 100644 index 00000000000..319e1969174 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.ReferencedMatches; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A term produced by a production rule which takes it's actual term value + * from one or more terms matched in the condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferenceTermProduction extends TermProduction { + + private String reference; + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String reference) { + super(); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + * @param termType the type of the term to produce + */ + public ReferenceTermProduction(String reference, TermType termType) { + super(termType); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String label,String reference) { + super(label); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public ReferenceTermProduction(String label,String reference, TermType termType) { + super(label,termType); + setReference(reference); + } + + /** The label of the condition this should take its value from, never null */ + public void setReference(String reference) { + Validator.ensureNotNull("reference name of a produced reference term",reference); + this.reference =reference; + } + + /** Returns the label of the condition this should take its value from, never null */ + public String getReference() { return reference; } + + void addMatchReferences(Set<String> matchReferences) { + matchReferences.add(reference); + } + + public void produce(RuleEvaluation e,int offset) { + ReferencedMatches referencedMatches=e.getReferencedMatches(reference); + if (referencedMatches==null) + throw new EvaluationException("Referred match '" + reference + "' not found"); + if (replacing) { + replaceMatches(e,referencedMatches); + } + else { + addMatches(e,referencedMatches); + } + } + + public void replaceMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.removeMatches(referencedMatches); + insertMatch(e, referencedMatches.matchIterator().next(),referencedItem,0); + } + + private void addMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.addItem(referencedItem,getTermType()); + } + + public String toInnerTermString() { + return getLabelString() + "[" + reference + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java new file mode 100644 index 00000000000..76433ec693c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production rule which <i>replaces</i> matched terms by the production + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReplacingProductionRule extends ProductionRule { + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + removeNonreferencedMatches(e); + if (e.getTraceLevel()>=5) { + e.trace(5,"Removed terms to get '" + e.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + "', will add terms"); + } + super.produce(e); + } + + /** Remove items until there's only one item left */ + private void removeNonreferencedMatches(RuleEvaluation e) { + int itemCount=e.getEvaluation().getQuerySize(); + + // Remove items backwards to ease index handling + for (int i=e.getNonreferencedMatchCount()-1; i>=0; i--) { + // Ensure we don't produce an empty query + if (getProduction().getTermCount()==0 && itemCount==1) + break; + itemCount--; + + Match match=e.getNonreferencedMatch(i); + match.getItem().getParent().removeItem(match.getPosition()); + } + } + + protected String getSymbol() { return "->"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java new file mode 100644 index 00000000000..3ba929da021 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A set of conditions which much match the query in sequence + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SequenceCondition extends CompositeCondition { + + public SequenceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + public String toInnerString() { + return toInnerString(" "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java new file mode 100644 index 00000000000..0b7b3b4a30b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which evaluates the <i>last included</i> version of + * the named condition this is a premise of. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SuperCondition extends Condition { + + private Condition condition; + + public void setCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondition() { + return condition; + } + + public boolean doesMatch(RuleEvaluation e) { + return condition.matches(e); + } + + public String toInnerString() { + if (condition==null) + return "@super"; + else + return condition.toString(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java new file mode 100644 index 00000000000..3558ef2b227 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.engine.NameSpace; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A term in a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class TermCondition extends Condition { + + private String term,termPlusS; + + /** Creates an invalid term */ + public TermCondition() { } + + public TermCondition(String term) { + this(null,term); + } + + public TermCondition(String label,String term) { + super(label); + this.term=term; + termPlusS=term + "s"; + } + + public String getTerm() { return term; } + + public void setTerm(String term) { + this.term=term; + termPlusS=term + "s"; + } + + protected boolean doesMatch(RuleEvaluation e) { + // TODO: Move this into the respective namespaces when query becomes one */ + if (getNameSpace()!=null) { + NameSpace nameSpace=e.getEvaluation().getNameSpace(getNameSpace()); + return nameSpace.matches(term,e); + } + else { + if (e.currentItem()==null) + return false; + + if (!labelMatches(e)) return false; + + String matchedValue=termMatches(e.currentItem().getItem(),e.getEvaluation().getStemming()); + boolean matches=matchedValue!=null && labelMatches(e.currentItem().getItem(),e); + if ((matches && !e.isInNegation() || (!matches && e.isInNegation()))) { + e.addMatch(e.currentItem(),matchedValue); + e.setValue(term); + e.next(); + } + return matches; + } + } + + /** Returns a non-null replacement term if there is a match, null otherwise */ + private String termMatches(TermItem queryTerm,boolean stemming){ + String queryTermString=queryTerm.stringValue(); + + // The terms are the same + boolean matches=queryTermString.equals(term); + if (matches) return term; + + if (stemming) + if (termMatchesWithStemming(queryTermString)) return term; + + return null; + } + + private boolean termMatchesWithStemming(String queryTermString) { + if (queryTermString.length()<3) return false; // Don't stem very short terms + + // The query term minus s is the same + boolean matches=queryTermString.equals(termPlusS); + if (matches) return true; + + // The query term plus s is the same + matches=term.equals(queryTermString + "s"); + if (matches) return true; + + return false; + } + + public String toInnerString() { + return getLabelString() + term; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java new file mode 100644 index 00000000000..6490d21e319 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class TermProduction extends Production { + + /** The label of this term, or null if none */ + private String label=null; + + /** The type of term to produce */ + private TermType termType; + + /** Creates a produced template term with no label and the default type */ + public TermProduction() { + this(null,TermType.DEFAULT); + } + + /** Creates a produced template term with the default term type */ + public TermProduction(String label) { + this(label,TermType.DEFAULT); + } + + /** Creates a produced template term with no label */ + public TermProduction(TermType termType) { + this(null,termType); + } + + public TermProduction(String label, TermType termType) { + this.label=label; + setTermType(termType); + } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the type of term to produce, never null. Default is DEFAULT */ + public TermType getTermType() { return termType; } + + /** Sets the term type to produce */ + public void setTermType(TermType termType) { + Validator.ensureNotNull("Type of produced Term",termType); + this.termType=termType; + } + + /** + * Inserts newItem at the position of this match + * TODO: Move to ruleevaluation + */ + protected void insertMatch(RuleEvaluation e,Match matched, Item newItem,int offset) { + newItem.setWeight(getWeight()); + int insertPosition=matched.getPosition()+offset; + + // This check is necessary (?) because earlier items may have been removed + // after we recorded the match position. It is sort of hackish. A cleaner + // solution would be to update the match position on changes + if (insertPosition>matched.getParent().getItemCount()) { + insertPosition=matched.getParent().getItemCount(); + } + + e.insertItem(newItem,matched.getParent(),insertPosition,getTermType()); + if (e.getTraceLevel()>=6) + e.trace(6,"Inserted item '" + newItem + "' at position " + insertPosition + " producing " + e.getEvaluation().getQuery().getModel().getQueryTree()); + } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** All instances of this produces a parseable string output */ + public final String toInnerString() { + if (termType==null) + return toInnerTermString(); + else + return termType.toSign() + toInnerTermString(); + } + + protected abstract String toInnerTermString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java b/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java new file mode 100644 index 00000000000..408707db2f3 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/statistics/StatisticsSearcher.java @@ -0,0 +1,314 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.statistics; + +import com.yahoo.component.ComponentId; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.container.QrSearchersConfig; +import com.yahoo.container.Server; +import com.yahoo.container.protect.Error; +import com.yahoo.jdisc.Metric; +import com.yahoo.log.LogLevel; +import com.yahoo.metrics.simple.MetricSettings; +import com.yahoo.metrics.simple.MetricReceiver; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; +import com.yahoo.statistics.Callback; +import com.yahoo.statistics.Counter; +import com.yahoo.statistics.Handle; +import com.yahoo.statistics.Value; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.logging.Level; + +import static com.yahoo.container.protect.Error.*; + + +/** + * <p>A searcher to gather statistics such as queries completed and query latency. There + * may be more than 1 StatisticsSearcher in the Searcher chain, each identified by a + * Searcher ID. The statistics accumulated by all StatisticsSearchers are stored + * in the singleton StatisticsManager object. </p> + * <p> + * TODO: Fix events to handle more than one of these searchers properly. + * + * @author Gene Meyers + * @author Steinar Knutsen + * @author bergum + */ +@Before(PhaseNames.RAW_QUERY) +public class StatisticsSearcher extends Searcher { + + private static final String MAX_QUERY_LATENCY_METRIC = "max_query_latency"; + private static final String EMPTY_RESULTS_METRIC = "empty_results"; + private static final String HITS_PER_QUERY_METRIC = "hits_per_query"; + private static final String FAILED_QUERIES_METRIC = "failed_queries"; + private static final String MEAN_QUERY_LATENCY_METRIC = "mean_query_latency"; + private static final String QUERY_LATENCY_METRIC = "query_latency"; + private static final String QUERIES_METRIC = "queries"; + private static final String ACTIVE_QUERIES_METRIC = "active_queries"; + private static final String PEAK_QPS_METRIC = "peak_qps"; + + private Counter queries; // basic counter + private Counter failedQueries; // basic counter + private Counter nullQueries; // basic counter + private Counter illegalQueries; // basic counter + private Value queryLatency; // mean pr 5 min + private Value queryLatencyBuckets; + private Value maxQueryLatency; // separate to avoid name mangling + @SuppressWarnings("unused") // all the work is done by the callback + private Value activeQueries; // raw measure every 5 minutes + private Value peakQPS; // peak 10s QPS + private Counter emptyResults; // number of results containing no concrete hits + private Value hitsPerQuery; // mean number of hits per query + private long prevMaxQPSTime; // previous measurement time of QPS + private double queriesForQPS = 0.0; + private final Object peakQpsLock = new Object(); + + private Metric metric; + private Map<String, Metric.Context> chainContexts = new CopyOnWriteHashMap<>(); + private Map<String, Metric.Context> yamasOnlyContexts = new CopyOnWriteHashMap<>(); + + + private void initEvents(com.yahoo.statistics.Statistics manager, MetricReceiver metricReceiver) { + queries = new Counter(QUERIES_METRIC, manager, false); + failedQueries = new Counter(FAILED_QUERIES_METRIC, manager, false); + nullQueries = new Counter("null_queries", manager, false); + illegalQueries = new Counter("illegal_queries", manager, false); + queryLatency = new Value(MEAN_QUERY_LATENCY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMean(true).setNameExtension(false)); + maxQueryLatency = new Value(MAX_QUERY_LATENCY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMax(true).setNameExtension(false)); + queryLatencyBuckets = Value.buildValue("query_latency", manager, null); + activeQueries = new Value(ACTIVE_QUERIES_METRIC, manager, + new Value.Parameters().setLogRaw(true).setCallback(new ActivitySampler())); + peakQPS = new Value(PEAK_QPS_METRIC, manager, new Value.Parameters().setLogRaw(false).setLogMax(true) + .setNameExtension(false)); + hitsPerQuery = new Value(HITS_PER_QUERY_METRIC, manager, + new Value.Parameters().setLogRaw(false).setLogMean(true).setNameExtension(false)); + emptyResults = new Counter(EMPTY_RESULTS_METRIC, manager, false); + metricReceiver.declareGauge(QUERY_LATENCY_METRIC, Optional.empty(), new MetricSettings.Builder().histogram(true).build()); + } + + // Callback to measure queries in flight every five minutes + private class ActivitySampler implements Callback { + public void run(Handle h, boolean firstRun) { + if (firstRun) { + metric.set(ACTIVE_QUERIES_METRIC, Integer.valueOf(0), null); + return; + } + // TODO Server.get() is to be removed + final int searchQueriesInFlight = Server.get().searchQueriesInFlight(); + ((Value) h).put(searchQueriesInFlight); + metric.set(ACTIVE_QUERIES_METRIC, searchQueriesInFlight, null); + } + } + + StatisticsSearcher(Metric metric) { + this(com.yahoo.statistics.Statistics.nullImplementation, metric, MetricReceiver.nullImplementation); + } + + public StatisticsSearcher(com.yahoo.statistics.Statistics manager, Metric metric, MetricReceiver metricReceiver) { + this.metric = metric; + initEvents(manager, metricReceiver); + } + + public String getMyID() { + return (getId().stringValue()); + } + + private void qps(long now, Metric.Context metricContext) { + // We can either have peakQpsLock _or_ have prevMaxQpsTime as a volatile + // and queriesForQPS as an AtomicInteger. That would lead no locking, + // but two memory barriers in the common case. Don't change till we know + // that is actually better. + synchronized (peakQpsLock) { + if ((now - prevMaxQPSTime) >= (10 * 1000)) { + double ms = (double) (now - prevMaxQPSTime); + final double peakQPS = queriesForQPS / (ms / 1000); + this.peakQPS.put(peakQPS); + metric.set(PEAK_QPS_METRIC, peakQPS, metricContext); + queriesForQPS = 1.0d; + prevMaxQPSTime = now; + } else { + queriesForQPS += 1.0d; + } + } + } + + private Metric.Context getChainMetricContext(String chainName) { + Metric.Context context = chainContexts.get(chainName); + if (context == null) { + Map<String, String> dimensions = new HashMap<>(); + dimensions.put("chain", chainName); + context = this.metric.createContext(dimensions); + chainContexts.put(chainName, context); + } + return context; + } + + /** + * Generate statistics for the query passing through this Searcher + * 1) Add 1 to total query count + * 2) Add response time to total response time (time from entry to return) + * 3) ..... + */ + public Result search(com.yahoo.search.Query query, Execution execution) { + Metric.Context metricContext = getChainMetricContext(execution.chain().getId().stringValue()); + + incrQueryCount(metricContext); + logQuery(query); + long start = System.currentTimeMillis(); // Start time, in millisecs. + qps(start, metricContext); + Result result; + //handle exceptions thrown below in searchers + try { + result = execution.search(query); // Pass on down the chain + } catch (Exception e) { + incrErrorCount(null, metricContext); + throw e; + } + + + long end = System.currentTimeMillis(); // Start time, in millisecs. + long latency = end - start; + if (latency >= 0) { + addLatency(latency, metricContext); + } else { + getLogger().log( + LogLevel.WARNING, + "Apparently negative latency measure, start: " + start + + ", end: " + end + ", for query: " + query.toString()); + } + if (result.hits().getError() != null) { + incrErrorCount(result, metricContext); + incrementYamasOnlyErrors(result, execution); + } + int hitCount = result.getConcreteHitCount(); + hitsPerQuery.put((double) hitCount); + metric.set(HITS_PER_QUERY_METRIC, (double) hitCount, metricContext); + if (hitCount == 0) { + emptyResults.increment(); + metric.add(EMPTY_RESULTS_METRIC, 1, metricContext); + } + + // Update running averages + //setAverages(); + + return result; + } + + private void logQuery(com.yahoo.search.Query query) { + // Don't parse the query if it's not necessary for the logging Query.toString triggers parsing + if (getLogger().isLoggable(Level.FINER)) { + getLogger().finer("Query: " + query.toString()); + } + } + + private void addLatency(long latency, Metric.Context metricContext) { + //myStats.addLatency(latency); + queryLatency.put(latency); + metric.set(QUERY_LATENCY_METRIC, latency, metricContext); + metric.set(MEAN_QUERY_LATENCY_METRIC, latency, metricContext); + maxQueryLatency.put(latency); + metric.set(MAX_QUERY_LATENCY_METRIC, latency, metricContext); + queryLatencyBuckets.put(latency); + } + + private void incrQueryCount(Metric.Context metricContext) { + //myStats.incrQueryCnt(); + queries.increment(); + metric.add(QUERIES_METRIC, 1, metricContext); + } + + private void incrErrorCount(Result result, Metric.Context metricContext) { + //If result is null an exception was thrown further down + if (result == null) { + //myStats.incrErrorCount(); + failedQueries.increment(); + metric.add(FAILED_QUERIES_METRIC, 1, metricContext); + metric.add("error.unhandled_exception", 1, metricContext); + return; + } + + if (result.hits().getErrorHit().hasOnlyErrorCode(Error.NULL_QUERY.code)) { + nullQueries.increment(); + return; + } else if (result.hits().getErrorHit().hasOnlyErrorCode(3)) { + illegalQueries.increment(); + return; + } + //myStats.incrErrorCount(); + failedQueries.increment(); + metric.add(FAILED_QUERIES_METRIC, 1, metricContext); + } + + /** + * Creates error metric for Yamas only. These metrics are only logged to state health page + * and not forwarded to the log file. + * + * @param result The result to check for errors + */ + private void incrementYamasOnlyErrors(Result result, Execution execution) { + if(result == null) + return; + + ErrorHit error = result.hits().getErrorHit(); + if (error == null) + return; + for (ErrorMessage m : error.errors()) { + int code = m.getCode(); + Metric.Context c = getDimensions(m.getSource(), result, execution); + if (code == TIMEOUT.code) { + metric.add("error.timeout", 1, c); + } else if (code == NO_BACKENDS_IN_SERVICE.code) { + metric.add("error.backends_oos", 1, c); + } else if (code == ERROR_IN_PLUGIN.code) { + metric.add("error.plugin_failure", 1, c); + } else if (code == BACKEND_COMMUNICATION_ERROR.code) { + metric.add("error.backend_communication_error", 1, c); + } else if (code == EMPTY_DOCUMENTS.code) { + metric.add("error.empty_document_summaries", 1, c); + } else if (code == ILLEGAL_QUERY.code) { + metric.add("error.illegal_query", 1, c); + } else if (code == INVALID_QUERY_PARAMETER.code) { + metric.add("error.invalid_query_parameter", 1, c); + } else if (code == INTERNAL_SERVER_ERROR.code) { + metric.add("error.internal_server_error", 1, c); + } else if (code == SERVER_IS_MISCONFIGURED.code) { + metric.add("error.misconfigured_server", 1, c); + } else if (code == INVALID_QUERY_TRANSFORMATION.code) { + metric.add("error.invalid_query_transformation", 1, c); + } else if (code == RESULT_HAS_ERRORS.code) { + metric.add("error.result_with_errors", 1, c); + } else if (code == UNSPECIFIED.code) { + metric.add("error.unspecified", 1, c); + } + } + } + + + private Metric.Context getDimensions(String source, Result r, Execution execution) { + Metric.Context context = yamasOnlyContexts.get(source == null ? "" : source); + if (context == null) { + Map<String, String> dims = new HashMap<>(); + if (source != null) { + dims.put("source", source); + } + context = this.metric.createContext(dims); + yamasOnlyContexts.put(source == null ? "" : source, context); + } + //TODO add other relevant metric dimensions + //Would be nice to have chain as a dimension as + //we can separate errors from different chains + return context; + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java b/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java new file mode 100644 index 00000000000..a2e7f98c002 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/statistics/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.statistics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/Context.java b/container-search/src/main/java/com/yahoo/prelude/templates/Context.java new file mode 100644 index 00000000000..be26e3230c7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/Context.java @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collection; + +import com.yahoo.text.XML; + +/** + * A set of variable bindings for template rendering + * + * @author bratseth + */ +public abstract class Context { + + private boolean xmlEscape = true; + + // These may be wrapped in an object if it gets unruly like this... + private String boldOpenTag; + private String boldCloseTag; + private String separatorTag; + + private boolean utf8Output = false; + + //prevent sub-classing outside of this package. + Context() {} + + // set|getXmlEscape no longer final on cause of HitContext subclassing _and_ wrapping Context + /** Sets whether this context should xml-escape returned values */ + public void setXmlEscape(boolean xmlEscape) { this.xmlEscape=xmlEscape; } + + /** Returns whether this context xml-escapes returned values. Default is true */ + public boolean getXmlEscape() { return xmlEscape; } + + /** + * Makes a <b>secondary</b> binding + * + * @return the old value bound to this key, or null it the key was previously unbound + */ + public abstract Object put(String key,Object value); + + /** + * <p>Returns a value by looking it up in the primary, + * and thereafter in secondary sources.</p> + * + * <p>If xml escaping is on and this is a string, xml attribute escaping is done + * </p> + */ + abstract public Object get(String key); + + /** + * Removes a <b>secondary</b> binding + * + * @return the removed value, or null if it wasn't bound + */ + public abstract Object remove(Object key); + + + // These three may be collapsed to one method + public void setBoldOpenTag(String boldOpenTag) { + this.boldOpenTag = boldOpenTag; + } + public void setBoldCloseTag(String boldCloseTag) { + this.boldCloseTag = boldCloseTag; + } + public void setSeparatorTag(String separatorTag) { + this.separatorTag = separatorTag; + } + + + protected Object normalizeValue(Object value) { + if (value == null) { + return ""; + } else if (xmlEscape && value instanceof String) { + return XML.xmlEscape((String) value, true, null); + } else { + return value; + } + } + + public String getBoldOpenTag() { + return boldOpenTag; + } + + public String getBoldCloseTag() { + return boldCloseTag; + } + + public String getSeparatorTag() { + return separatorTag; + } + + public abstract Collection<? extends Object> getKeys(); + + /** + * Used by the template to decide whether to use UTF-8 optimizations. + * + * @return whether the result encoding is UTF-8 + */ + public boolean isUtf8Output() { + return utf8Output; + } + + /** + * Used by the template to decide whether to use UTF-8 optimizations. + * TODO: TVT: Make this package private again + * @param utf8Output whether the output encoding is UTF-8 + */ + public void setUtf8Output(boolean utf8Output) { + this.utf8Output = utf8Output; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java new file mode 100644 index 00000000000..1a7c5a738be --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/DefaultTemplateSet.java @@ -0,0 +1,301 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.concurrent.CopyOnWriteHashMap; +import com.yahoo.io.ByteWriter; +import com.yahoo.net.URI; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.search.Result; +import com.yahoo.search.grouping.result.HitRenderer; +import com.yahoo.search.result.*; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.Iterator; +import java.util.Map; + +/** + * <p>A template set which provides a default rendering of results and hits.</p> + * + * <p>This can be extended to create custom programmatic templates. + * Create a subclass which has static inner classes extending DefaultTemplate for the templates + * you wish to override and call the set method for those templates in the subclass template set + * constructor. Some of the default templates contained utility functions, and can be overridden + * in place of DefaultTemplate to gain access to these. See TiledTemplateSet for an example.</p> + * + * @author bratseth + */ +public class DefaultTemplateSet extends UserTemplate<XMLWriter> { + + private static final Utf8String RESULT = new Utf8String("result"); + private static final Utf8String GROUP = new Utf8String("group"); + private static final Utf8String ID = new Utf8String("id"); + private static final Utf8String FIELD = new Utf8String("field"); + private static final Utf8String HIT = new Utf8String("hit"); + private static final Utf8String ERROR = new Utf8String("error"); + private static final Utf8String TOTAL_HIT_COUNT = new Utf8String("total-hit-count"); + private static final Utf8String QUERY_TIME = new Utf8String("querytime"); + private static final Utf8String SUMMARY_FETCH_TIME = new Utf8String("summaryfetchtime"); + private static final Utf8String SEARCH_TIME = new Utf8String("searchtime"); + private static final Utf8String NAME = new Utf8String("name"); + private static final Utf8String CODE = new Utf8String("code"); + private static final Utf8String COVERAGE_DOCS = new Utf8String("coverage-docs"); + private static final Utf8String COVERAGE_NODES = new Utf8String("coverage-nodes"); + private static final Utf8String COVERAGE_FULL = new Utf8String("coverage-full"); + private static final Utf8String COVERAGE = new Utf8String("coverage"); + private static final Utf8String RESULTS_FULL = new Utf8String("results-full"); + private static final Utf8String RESULTS = new Utf8String("results"); + private static final Utf8String TYPE = new Utf8String("type"); + private static final Utf8String RELEVANCY = new Utf8String("relevancy"); + private static final Utf8String SOURCE = new Utf8String("source"); + + private final CopyOnWriteHashMap<String, Utf8String> fieldNameMap = new CopyOnWriteHashMap<>(); + + + /** + * Create a template set with a name. This will be initialized with the default templates - + * use the set methods from the subclass constructor to override any of these with other template classes. + */ + protected DefaultTemplateSet(String name) { + super(name, + DEFAULT_MIMETYPE, + DEFAULT_ENCODING + ); + } + + public DefaultTemplateSet() { + this("default"); + } + + /** Uses an XML writer in this template */ + @Override + public XMLWriter wrapWriter(Writer writer) { + return XMLWriter.from(writer, 10, -1); + } + + @Override + public void header(Context context, XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + // TODO: move setting this to Result + context.setUtf8Output("utf-8".equalsIgnoreCase(getRequestedEncoding(result.getQuery()))); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag(RESULT).attribute(TOTAL_HIT_COUNT,String.valueOf(result.getTotalHitCount())); + if (result.getQuery().getPresentation().getReportCoverage()) { + renderCoverageAttributes(result.getCoverage(false), writer); + } + renderTime(writer, result); + writer.closeStartTag(); + } + + private void renderTime(final XMLWriter writer, final Result result) { + if (!result.getQuery().getPresentation().getTiming()) { + return; + } + + final String threeDecimals = "%.3f"; + final double milli = .001d; + final long now = System.currentTimeMillis(); + final long searchTime = now - result.getElapsedTime().first(); + final double searchSeconds = ((double) searchTime) * milli; + + if (result.getElapsedTime().firstFill() != 0L) { + final long queryTime = result.getElapsedTime().weightedSearchTime(); + final long summaryFetchTime = result.getElapsedTime().weightedFillTime(); + final double querySeconds = ((double) queryTime) * milli; + final double summarySeconds = ((double) summaryFetchTime) * milli; + writer.attribute(QUERY_TIME, String.format(threeDecimals, querySeconds)); + writer.attribute(SUMMARY_FETCH_TIME, String.format(threeDecimals, summarySeconds)); + } + writer.attribute(SEARCH_TIME, String.format(threeDecimals, searchSeconds)); + } + + @Override + public void footer(Context context, XMLWriter writer) throws IOException { + writer.closeTag(); + } + + @Override + /** + * Renders the header of a hit.<br/> + * Post-condition: The hit tag is open in this XML writer + */ + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit=(Hit)context.get("hit"); + + if (hit instanceof HitGroup) { + renderHitGroup((HitGroup) hit, context, writer); + } else { + writer.openTag(HIT); + renderHitAttributes(hit,writer); + writer.closeStartTag(); + renderHitFields(context, hit, writer); + } + } + + + @Override + /** + * Renders the footer of a hit. + * + * Pre-condition: The hit tag is open in this XML writer.<br/> + * Post-condition: The hit tag is closed + */ + public void hitFooter(Context context, XMLWriter writer) throws IOException { + writer.closeTag(); + } + + @Override + public void error(Context context, XMLWriter writer) throws IOException { + ErrorMessage error=((Result)context.get("result")).hits().getError(); + writer.openTag(ERROR).attribute(CODE,error.getCode()).content(error.getMessage(),false).closeTag(); + } + + @Override + public void noHits(Context context, XMLWriter writer) throws IOException { + // no hits, do nothing :) + } + + protected static void renderCoverageAttributes(Coverage coverage, XMLWriter writer) throws IOException { + if (coverage == null) return; + writer.attribute(COVERAGE_DOCS,coverage.getDocs()); + writer.attribute(COVERAGE_NODES,coverage.getNodes()); + writer.attribute(COVERAGE_FULL,coverage.getFull()); + writer.attribute(COVERAGE,coverage.getResultPercentage()); + writer.attribute(RESULTS_FULL,coverage.getFullResultSets()); + writer.attribute(RESULTS,coverage.getResultSets()); + } + + /** + * Writes a hit's default attributes like 'type', 'source', 'relevancy'. + */ + protected void renderHitAttributes(Hit hit,XMLWriter writer) throws IOException { + writer.attribute(TYPE,hit.getTypeString()); + if (hit.getRelevance() != null) { + writer.attribute(RELEVANCY, hit.getRelevance().toString()); + } + writer.attribute(SOURCE, hit.getSource()); + } + + /** Opens (but does not close) the group hit tag */ + protected void renderHitGroup(HitGroup hit, Context context, XMLWriter writer) throws IOException { + if (HitRenderer.renderHeader(hit, writer)) { + // empty + } else if (hit.types().contains("grouphit")) { + // TODO Keep this? + renderHitGroupOfTypeGroupHit(context, hit, writer); + } else { + renderGroup(hit, writer); + } + } + + + /** + * Renders a hit group. + */ + protected void renderGroup(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag(GROUP); + renderHitAttributes(hit, writer); + writer.closeStartTag(); + } + + // Can't name this renderGroupHit as GroupHit is a class having nothing to do with HitGroup. + // Confused yet? Good! + protected void renderHitGroupOfTypeGroupHit(Context context, HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag(HIT); + renderHitAttributes(hit, writer); + renderId(hit.getId(), writer); + writer.closeStartTag(); + } + + + protected void renderId(URI uri, XMLWriter writer) throws IOException { + if (uri != null) { + writer.openTag(ID).content(uri.stringValue(),false).closeTag(); + } + } + + /** + * Renders all fields of a hit. + * Simply calls {@link #renderField(Context, Hit, java.util.Map.Entry, XMLWriter)} for every field. + */ + protected void renderHitFields(Context context, Hit hit, XMLWriter writer) throws IOException { + renderSyntheticRelevancyField(hit, writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + renderField(context, hit, it.next(), writer); + } + } + + private void renderSyntheticRelevancyField(Hit hit, XMLWriter writer) throws IOException { + final String relevancyFieldName = "relevancy"; + final Relevance relevance = hit.getRelevance(); + + if (shouldRenderField(hit, relevancyFieldName) && relevance != null) { + renderSimpleField(relevancyFieldName, relevance, writer); + } + } + + protected void renderField(Context context, Hit hit, Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + String fieldName = entry.getKey(); + + if (!shouldRenderField(hit, fieldName)) return; + if (fieldName.startsWith("$")) return; // Don't render fields that start with $ // TODO: Move to should render + + writeOpenFieldElement(fieldName, writer); + renderFieldContent(context, hit, fieldName, writer); + writeCloseFieldElement(writer); + } + + private void writeOpenFieldElement(String fieldName, XMLWriter writer) throws IOException { + Utf8String utf8 = fieldNameMap.get(fieldName); + if (utf8 == null) { + utf8 = new Utf8String(fieldName); + fieldNameMap.put(fieldName, utf8); + } + writer.openTag(FIELD).attribute(NAME, utf8); + writer.closeStartTag(); + } + + private void writeCloseFieldElement(XMLWriter writer) throws IOException { // TODO: Collapse + writer.closeTag(); + } + + protected void renderFieldContent(Context context, Hit hit, + String name, XMLWriter writer) + throws IOException { + + boolean dumpedRaw = false; + if (hit instanceof FastHit && ((FastHit)hit).fieldIsNotDecoded(name)) { + writer.closeStartTag(); + if ((writer.getWriter() instanceof ByteWriter) && context.isUtf8Output()) { + dumpedRaw = dumpBytes((ByteWriter) writer.getWriter(), (FastHit) hit, name); + } + if (dumpedRaw) { + writer.content("",false); // let the xml writer note that this tag had content + } + } + if (!dumpedRaw) { + String xmlval = hit.getFieldXML(name); + if (xmlval == null) { + xmlval = "(null)"; + } + writer.escapedContent(xmlval,false); + } + } + + private void renderSimpleField(String fieldName, Object fieldValue, XMLWriter writer) throws IOException { + writeOpenFieldElement(fieldName, writer); + writer.content(fieldValue.toString(),false); + writeCloseFieldElement(writer); + } + + /** Returns whether a field should be rendered. This default implementation always returns true */ + protected boolean shouldRenderField(Hit hit, String fieldName) { + // skip depending on hit type + return true; + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java b/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java new file mode 100644 index 00000000000..307378f2106 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/FormattingOptions.java @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +/** + * Defines formatting options used with special kinds of hits. + * + * @author laboisse + */ +public class FormattingOptions { + + public static final String DEFAULT_TYPE_ATTRIBUTE_NAME = "type"; + + /** + * A structure that defines the tag name and attribute name for a field + * that sould be formatted as a field with a subtype. + * @author laboisse + * + */ + static class SubtypeField { + String tagName; + String attributeName; + String attributeValue; + } + + static class SubtypeFieldWithPrefix extends SubtypeField { + + /* Note: attributeValue should always be null for instances of this class */ + + int prefixLength; + } + + private Map<String, String> fieldsAsAttributes = new LinkedHashMap<>(); + + private Map<String, SubtypeField> fieldsWithSubtypes = new LinkedHashMap<>(); + private Map<String, SubtypeFieldWithPrefix> prefixedFieldsWithSubtypes = new LinkedHashMap<>(); + + private Set<String> fieldsNotRendered = new LinkedHashSet<>(); + private Set<String> fieldsRendered = new LinkedHashSet<>(); + + /** + * Tells to format a field as an attribute of the hit's tag. + * + * For instance, field 'query-latency' could be rendered as an attribute 'latency' by + * invoking {@code formatFieldAsAttribute("query-latency", "latency")}. + * + * Output would be: + * <pre> + * <hit latency="100"></hit> + * </pre> + * instead of: + * <pre> + * <hit><latency>100</latency></hit> + * </pre> + */ + public void formatFieldAsAttribute(String fieldName, String attributeName) { + fieldsAsAttributes.put(fieldName, attributeName); + } + + public Set<Map.Entry<String, String>> fieldsAsAttributes() { + return Collections.unmodifiableSet(this.fieldsAsAttributes.entrySet()); + } + + public String getAttributeName(String fieldName) { + return this.fieldsAsAttributes.get(fieldName); + } + + /** + * Tells to format a field using a subtype. A subtype is used when there is kind of a grouping + * for a set of fields. + * + * For instance, fields 'latency-connect', 'latency-finish' all belong to the same 'latency' logical group. + * So invoking {@code formatFieldWithSubtype("latency-connect", "latency", "type", "connect"}, + * {@code formatFieldWithSubtype("latency-finish", "latency", "type", "connect"} and so on, + * allows to have a common 'latency' tag name for all fields of the same kind. + * Note that it does no collapsing on tags. + * + * Output would be: + * <pre> + * <latency type="connect">50</latency> + * <latency type="finish">250</latency> + * </pre> + * Instead of: + * <pre> + * <hit> + * <latency-connect>50</latency-connect> + * <latency-finish>50</latency-finish> + * </pre> + */ + public void formatFieldWithSubtype(String fieldName, String tagName, String typeAttributeName, String typeAttributeValue) { + SubtypeField names = new SubtypeField(); + names.attributeName = typeAttributeName; + names.attributeValue = typeAttributeValue; + names.tagName = tagName; + fieldsWithSubtypes.put(fieldName, names); + } + + public SubtypeField getSubtype(String fieldName) { + return this.fieldsWithSubtypes.get(fieldName); + } + + /** + * Same as {@link #formatFieldWithSubtype(String, String, String, String)} except that fields + * are selected based on the beginning of their name and the type attribute value is deduced + * from the rest of their name. So this may select many fields instead of only one. + * Invoking {@code formatFieldWithSubtype("latency-", "latency", "type")} only once allows to have a common 'latency' + * tag name for all fields that start with 'latency-'. Type attribute value will be 'start' for field 'latency-start'. + * Note that it does no collapsing on tags. + * + * This is mostly used when you don't know all field names ahead. + * + * Output would be: + * <pre> + * <latency type="connect">50</latency> + * <latency type="finish">250</latency> + * </pre> + * Instead of: + * <pre> + * <hit> + * <latency-connect>50</latency-connect> + * <latency-finish>50</latency-finish> + * </pre> + * + * Note: don't use this with prefixes that start with a common substring (e.g. 'http', 'http_proxy'), I can tell you it just won't work. + */ + public void formatFieldWithSubtype(String fieldNamePrefix, String tagName, String typeAttributeName) { + SubtypeFieldWithPrefix names = new SubtypeFieldWithPrefix(); + names.attributeName = typeAttributeName; + names.tagName = tagName; + names.prefixLength = fieldNamePrefix.length(); + prefixedFieldsWithSubtypes.put(fieldNamePrefix, names); + } + + public SubtypeFieldWithPrefix getSubtypeWithPrefix(String fieldName) { + for(Map.Entry<String, SubtypeFieldWithPrefix> e : this.prefixedFieldsWithSubtypes.entrySet()) { + if(fieldName.startsWith(e.getKey())) + return e.getValue(); + } + return null; + } + + /** + * Tells whether a field should be rendered. + * + * @see #setFieldNotToRender(String) + * @see #setFieldToRender(String) + */ + public boolean shouldRenderField(String fieldName) { + if(fieldName == null) + return false; + if (fieldName.startsWith("$")) { + return false; + } + if(!this.fieldsRendered.isEmpty()) + return this.fieldsRendered.contains(fieldName); + return !this.fieldsNotRendered.contains(fieldName); + } + + /** + * Tells a field should be rendered. + * + * <p> + * Note: if at least one field is set to render, then only + * these fields should be rendered. Use {@link #setFieldNotToRender(String)} + * to only exclude specific fields. + */ + public void setFieldToRender(String fieldName) { + this.fieldsRendered.add(fieldName); + } + + /** + * Tells a field should not be rendered. + * + * <p> + * Note: all other fields should be rendered. Use {@link #setFieldToRender(String)} + * to only include specific fields. + */ + public void setFieldNotToRender(String fieldName) { + this.fieldsNotRendered.add(fieldName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java new file mode 100644 index 00000000000..fd43fc83a12 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/GenericTemplateSet.java @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.protect.Validator; +import com.yahoo.search.Query; + +import java.io.Writer; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Properties; + +/** + * Superclass of a set of templates for rendering (serializing) results + */ +// TODO: Deprecate everything having to do with Templates - we should only support Renderers to serialize a Result. +public class GenericTemplateSet { + + public static final String DEFAULT_MIMETYPE = "text/xml"; + public static final String DEFAULT_ENCODING = "utf-8"; + + /** Templates */ + private HashMap<String, Template<? extends Writer>> templates; + + /** The text MIME subtype this template returns, xml, plain or html */ + private String mimeType; + + /** The charset encoding this template should have */ + private String encoding; + + private String boldOpenTag = null; + private String boldCloseTag = null; + private String separatorTag = null; + + /** + * Document summary class for this template + */ + private String summaryClass = null; + + /** + * The unique name of this template set + */ + private final String name; + + /** + * Creates a template set containing no templates + */ + public GenericTemplateSet(String name, String mimeType, String encoding) { + this.mimeType = mimeType; + this.encoding = encoding; + this.name = name; + + templates = new LinkedHashMap<>(); + } + + + public String getName() { + return name; + } + + /** + * Returns the text MIME + */ + public String getMimeType() { return mimeType; } + + /** + * Returns the text encoding + */ + public String getEncoding() { return encoding; } + + /** Returns the encoding of the query, or the encoding given by the template if none is set */ + public final String getRequestedEncoding(Query query) { + String encoding = query.getModel().getEncoding(); + if (encoding != null) return encoding; + return getEncoding(); + } + + /** + * Returns the selected template + * + * @return the template to use, never null + */ + public Template<? extends Writer> getTemplate(String templateName) { + return templates.get(templateName); + } + + /** + * Sets the selected template + * + * @throws NullPointerException if the given template is null + */ + public void setTemplate(String templateName, Template<? extends Writer> template) { + templates.put(templateName,template); + } + + /** + * Sets the selected template + * + * @throws NullPointerException if the given template is null + */ + public void setTemplateNotNull(String templateName, Template<? extends Writer> template) { + Validator.ensureNotNull("Template "+templateName,template); + templates.put(templateName,template); + } + + + /** + * Sets the highligting marks for this template + * + * @param start the highlingting start mark + * @param end the highlingting end mark + * @param sep the highlingting separator mark + */ + public void setHighlightTags(String start, String end, String sep) { + boldOpenTag = start; + boldCloseTag = end; + separatorTag = sep; + } + + // may return null + public String getBoldOpenTag() { + return boldOpenTag; + } + + // may return null + public String getBoldCloseTag() { + return boldCloseTag; + } + + // may return null + public String getSeparatorTag() { + return separatorTag; + } + + + /** + * Set the default summary class to use with this template. + */ + public void setSummaryClass(String summaryClass) { + this.summaryClass = summaryClass; + } + + /** + * Type safe accessor to get the default document summary class for this + * template set. This is also here to insulate the rest of the code + * against changes in the naming of the properties in the property file. + */ + public String getSummaryClass() { + if (summaryClass != null && ! summaryClass.isEmpty()) { + return summaryClass; + } else { + return null; + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java b/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java new file mode 100644 index 00000000000..745a5ad85fe --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/HitContext.java @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.prelude.hitfield.HitField; +import com.yahoo.prelude.hitfield.JSONString; +import com.yahoo.prelude.hitfield.XMLString; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.StructuredData; +import com.yahoo.search.result.FeatureData; +import com.yahoo.text.XML; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * A context providing all the fields of a hit, and falls back to MapContext behavior for all other keys. + * + * @author tonytv + */ +public class HitContext extends Context { + + private final Hit hit; + private final Context fallbackContext; + + public HitContext(Hit hit, Context fallbackContext) { + this.hit = hit; + this.fallbackContext = fallbackContext; + } + + @Override + public Object put(String key, Object value) { + return fallbackContext.put(key, value); + } + + @Override + public Object get(String key) { + Object value = normalizedHitProperty(key); + return value != null ? + value : + fallbackContext.get(key); + } + + @Override + public Object remove(Object key) { + return fallbackContext.remove(key); + } + + @Override + public Collection<? extends Object> getKeys() { + Set<Object> keys = new HashSet<>(fallbackContext.getKeys()); + keys.addAll(hit.fieldKeys()); + return keys; + } + + @Override + public void setBoldOpenTag(String boldOpenTag) { + fallbackContext.setBoldOpenTag(boldOpenTag); + } + + @Override + public void setBoldCloseTag(String boldCloseTag) { + fallbackContext.setBoldCloseTag(boldCloseTag); + } + + @Override + public void setSeparatorTag(String separatorTag) { + fallbackContext.setSeparatorTag(separatorTag); + } + + @Override + public String getBoldOpenTag() { + return fallbackContext.getBoldOpenTag(); + } + + @Override + public String getBoldCloseTag() { + return fallbackContext.getBoldCloseTag(); + } + + @Override + public String getSeparatorTag() { + return fallbackContext.getSeparatorTag(); + } + + @Override + //TVT: TODO: Make this package private again. + public boolean isUtf8Output() { + return fallbackContext.isUtf8Output(); + } + + @Override + //TODO: TVT: make this package private again + public void setUtf8Output(boolean utf8Output) { + fallbackContext.setUtf8Output(utf8Output); + } + + @Override + public void setXmlEscape(boolean xmlEscape) { + fallbackContext.setXmlEscape(xmlEscape); + } + + @Override + public boolean getXmlEscape() { + return fallbackContext.getXmlEscape(); + } + + @Override + protected Object normalizeValue(Object value) { + return fallbackContext.normalizeValue(value); + } + + private Object normalizedHitProperty(String key) { + Object value = hit.getField(key); + return value == null ? + null : + normalizeHitFieldValue(value); + } + + private Object normalizeHitFieldValue(Object value) { + if (value instanceof HitField) { + HitField hf = (HitField) value; + if (getXmlEscape()) { + return hf.quotedContent(getBoldOpenTag(), + getBoldCloseTag(), + getSeparatorTag(), + true); + } else { + return hf.getContent(getBoldOpenTag(), + getBoldCloseTag(), + getSeparatorTag()); + } + } else if (value instanceof StructuredData) { + return value.toString(); + } else if (value instanceof XMLString || value instanceof JSONString) { + return value.toString(); + } else if (getXmlEscape()) { + return XML.xmlEscape(value.toString(), true, null); + } else { + return value.toString(); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java b/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java new file mode 100644 index 00000000000..49163e8fa90 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/LogExceptionUserTemplateDelegator.java @@ -0,0 +1,196 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.log.LogLevel; +import com.yahoo.yolean.Exceptions; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; +import java.util.logging.Logger; + +/** + * Delegates to another UserTemplate, but handles any exceptions(except IOException) by logging them. + * @author tonytv + */ +public class LogExceptionUserTemplateDelegator<T extends Writer> extends UserTemplate<T> { + + private static Logger log = Logger.getLogger(LogExceptionUserTemplateDelegator.class.getName()); + private final UserTemplate<T> delegate; + + public LogExceptionUserTemplateDelegator(UserTemplate<T> delegate) { + super(LogExceptionUserTemplateDelegator.class.getSimpleName()); + this.delegate = delegate; + } + + @Override + public Context createContext() { + return delegate.createContext(); + } + + @Override + public T wrapWriter(Writer writer) { + return delegate.wrapWriter(writer); + } + + @Override + public boolean isDefaultTemplateSet() { + return delegate.isDefaultTemplateSet(); + } + + @Override + public String getSummaryClass() { + return delegate.getSummaryClass(); + } + + @Override + public String getBoldOpenTag() { + return delegate.getBoldOpenTag(); + } + + @Override + public String getBoldCloseTag() { + return delegate.getBoldCloseTag(); + } + + @Override + public String getSeparatorTag() { + return delegate.getSeparatorTag(); + } + + @Override + public void setSummaryClass(String summaryClass) { + delegate.setSummaryClass(summaryClass); + } + + @Override + public void setHighlightTags(String start, String end, String sep) { + delegate.setHighlightTags(start, end, sep); + } + + @Override + public String getName() { + return delegate.getName(); + } + + @Override + public String getMimeType() { + return delegate.getMimeType(); + } + + @Override + public String getEncoding() { + return delegate.getEncoding(); + } + + @Override + public Template<T> getTemplate(String templateName) { + throw new UnsupportedOperationException(); + } + + @Override + public void setTemplate(String templateName, Template<? extends Writer> template) { + throw new UnsupportedOperationException(); + } + + @Override + public void setTemplateNotNull(String templateName, Template<? extends Writer> template) { + throw new UnsupportedOperationException(); + } + + /*** Template + + @Override + public void <methodName>(Context context, T writer) throws IOException { + try { + delegate.<methodName>(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + ***/ + + /*** Begin expanded template for + header, footer, hit, hitFooter, error, noHits, queryContext, + Thanks java, for giving me the opportunely to use copy-paste ***/ + + + @Override + public void header(Context context, T writer) throws IOException { + try { + delegate.header(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void footer(Context context, T writer) throws IOException { + try { + delegate.footer(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void hit(Context context, T writer) throws IOException { + try { + delegate.hit(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void hitFooter(Context context, T writer) throws IOException { + try { + delegate.hitFooter(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void error(Context context, T writer) throws IOException { + try { + delegate.error(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void noHits(Context context, T writer) throws IOException { + try { + delegate.noHits(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + @Override + public void queryContext(Context context, T writer) throws IOException { + try { + delegate.queryContext(context, writer); + } catch (Exception e) { + handleException(e); + } + } + + /*** End expanded template. ***/ + + private void handleException(Exception e) throws IOException { + if (e instanceof IOException) { + throw (IOException) e; + } else { + log.log(LogLevel.WARNING, "Exception thrown in " + getName() + + ": " + Exceptions.toMessageString(e), e); + } + } + + UserTemplate<T> getDelegate() { + return delegate; + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java b/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java new file mode 100644 index 00000000000..328faee5c29 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/MapContext.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** A context having a map as secondary storage */ +public class MapContext extends Context { + + private Map<String, Object> map = new LinkedHashMap<>(); + + @Override + public Object get(String key) { + return normalizeValue(map.get(key)); + } + + public Object put(String name, Object value) { + return map.put(name, value); + } + + public Object remove(Object name) { + return map.remove(name); + } + + @Override + public Collection<? extends Object> getKeys() { + return map.keySet(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java new file mode 100644 index 00000000000..26b51187954 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/PageTemplateSet.java @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; + +/** + * A template implementing the 'page' format. + * This is a variant of the tiled template set - see that class for details. + * + * @author bratseth + */ +public class PageTemplateSet extends TiledTemplateSet { + + public PageTemplateSet() { + super("page"); + } + + @Override + /** Uses an XML writer in this */ + public XMLWriter wrapWriter(Writer writer) { return new XMLWriter(super.wrapWriter(writer)); } + + @Override + public void header(Context context,XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag("page").attribute("version","1.0").attribute("layout",result.hits().getField("layout")); + renderCoverageAttributes(result.getCoverage(false), writer); + writer.closeStartTag(); + renderSectionContent(result.hits(),writer); + } + + @Override + public void footer(Context context,XMLWriter writer) throws IOException { + if (writer.isIn("content")) + writer.closeTag(); + super.footer(context,writer); + } + + @Override + protected void renderSection(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag("section"); + writer.attribute("id",hit.getDisplayId()); + writer.attribute("layout",hit.getField("layout")); + writer.attribute("region",hit.getField("region")); + writer.closeStartTag(); + renderSectionContent(hit,writer); + } + + @Override + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit = (Hit) context.get("hit"); + if (!hit.isMeta() && !writer.isIn("content")) + writer.openTag("content"); + super.hit(context,writer); + } + + @Override + public void hitFooter(Context context, XMLWriter writer) throws IOException { + if (writer.isIn("content")) + writer.closeTag(); + super.hitFooter(context, writer); + } + + public String toString() { return "page template"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java b/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java new file mode 100644 index 00000000000..ca9dba6fc0f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/SearchRendererAdaptor.java @@ -0,0 +1,256 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.prelude.fastsearch.GroupingListHit; +import com.yahoo.search.Result; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.rendering.Renderer; +import com.yahoo.search.result.*; +import com.yahoo.search.result.ErrorHit; +import com.yahoo.processing.request.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.text.XMLWriter; + +import java.io.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; + +/** + * Renders a search result using the old templates API. + * + * @author tonytv + */ +@SuppressWarnings({ "rawtypes", "deprecation", "unchecked" }) +public final class SearchRendererAdaptor extends Renderer { + + private final LogExceptionUserTemplateDelegator templates; + + //Per instance members, must be created at rendering time, not construction time due to cloning. + private Context context; + + public SearchRendererAdaptor(UserTemplate userTemplate) { + templates = new LogExceptionUserTemplateDelegator(userTemplate); + } + + @Override + public void init() { + super.init(); + context = templates.createContext(); + } + + /** A legacy test utility - do not use. */ + public static void callRender(OutputStream stream, Result result) throws IOException { + Renderer rendererAdaptor = new SearchRendererAdaptor(result.getTemplating().getTemplates()); + rendererAdaptor.init(); + result.getTemplating().setRenderer(rendererAdaptor); + rendererAdaptor.render(stream, result, result.getQuery().getModel().getExecution(), result.getQuery()); + } + + @Override + public String getEncoding() { + return templates.getEncoding(); + } + + @Override + public String getMimeType() { + return templates.getMimeType(); + } + + @Override + public String getDefaultSummaryClass() { + return templates.getSummaryClass(); + } + + /** + * Renders this result + */ + public void render(Writer writer, Result result) throws java.io.IOException { + Writer wrappedWriter = wrapWriter(writer); + + beginResult(wrappedWriter, result); + + if (result.hits().getError() != null || result.hits().getQuery().errors().size() > 0) { + error(wrappedWriter, Collections.unmodifiableCollection( + all(result.hits().getQuery().errors(), result.hits().getError()))); + } + + if (result.getConcreteHitCount() == 0) { + emptyResult(wrappedWriter, result); + } + + if (result.getContext(false) != null) { + queryContext(wrappedWriter, result.getContext(false)); + } + + renderHitGroup(wrappedWriter, result.hits(), result.hits().getQuery().getOffset() + 1); + + endResult(wrappedWriter, result); + } + + + private <T> Collection<T> all(Collection<T> collection, T extra) { + Collection<T> result = new ArrayList<>(collection); + result.add(extra); + return result; + } + + + public Writer wrapWriter(Writer writer) { + return templates.wrapWriter(writer); + } + + + public void beginResult(Writer writer, Result result) throws IOException { + context.put("context", context); + context.put("result", result); + context.setBoldOpenTag(templates.getBoldOpenTag()); + context.setBoldCloseTag(templates.getBoldCloseTag()); + context.setSeparatorTag(templates.getSeparatorTag()); + + templates.header(context, writer); + } + + public void endResult(Writer writer, Result result) throws IOException { + templates.footer(context, writer); + } + + public void error(Writer writer, Collection<ErrorMessage> errorMessages) throws IOException { + templates.error(context, writer); + } + + + public void emptyResult(Writer writer, Result result) throws IOException { + templates.noHits(context, writer); + } + + public void queryContext(Writer writer, QueryContext queryContext) throws IOException { + templates.queryContext(context, writer); + } + + private void renderHitGroup(Writer writer, HitGroup hitGroup, int hitnumber) + throws IOException { + boolean defaultTemplate = templates.isDefaultTemplateSet(); + for (Hit hit : hitGroup.asList()) { + if (!defaultTemplate && hit instanceof ErrorHit) continue; // TODO: Stop doing this + + renderHit(writer, hit, hitnumber); + if (!hit.isAuxiliary()) + hitnumber++; + } + } + + + /** + * Renders this hit as xml. The default implementation will call the simpleRender() + * hook. If it returns true, nothing more is done, otherwise the + * given template set will be used for rendering. + * + * + * @param writer the writer to append this hit to + * @throws java.io.IOException if rendering fails + */ + public void renderHit(Writer writer, Hit hit, int hitno) throws IOException { + renderRegularHit(writer, hit, hitno); + } + + private void renderRegularHit(Writer writer, Hit hit, int hitno) throws IOException { + boolean renderedSimple = simpleRenderHit(writer, hit); + + if (renderedSimple) { + return; + } + + HitContext hitContext = new HitContext(hit, context); + hitContext.put("hit", hit); + hitContext.put("hitno", new Integer(hitno)); + hitContext.put("relevancy",hit.getRelevance()); + templates.hit(hitContext, writer); + + if (hit instanceof HitGroup) + renderHitGroup(writer, (HitGroup) hit, hitno); + + // Put these back - may have been changed by nested rendering + hitContext.put("hit", hit); + hitContext.put("hitno", new Integer(hitno)); + templates.hitFooter(hitContext, writer); + + + hitContext.remove("hit"); + hitContext.remove("hitno"); + } + + private boolean simpleRenderHit(Writer writer, Hit hit) throws IOException { + if (hit instanceof DefaultErrorHit) { + return simpleRenderDefaultErrorHit(writer, (DefaultErrorHit) hit); + } else if (hit instanceof GroupingListHit) { + return true; + } else { + return false; + } + } + + public static boolean simpleRenderDefaultErrorHit(Writer writer, ErrorHit defaultErrorHit) throws IOException { + XMLWriter xmlWriter=(writer instanceof XMLWriter) ? (XMLWriter)writer : new XMLWriter(writer,10,-1); + xmlWriter.openTag("errordetails"); + for (Iterator i = defaultErrorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + renderMessageDefaultErrorHit(xmlWriter, error); + } + xmlWriter.closeTag(); + return true; + } + + public static void renderMessageDefaultErrorHit(XMLWriter writer, ErrorMessage error) throws IOException { + writer.openTag("error"); + if (error instanceof com.yahoo.search.result.ErrorMessage) + writer.attribute("source",((com.yahoo.search.result.ErrorMessage)error).getSource()); + writer.attribute("error",error.getMessage()); + writer.attribute("code",Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(),false); + if (error.getCause()!=null) { + writer.openTag("cause"); + writer.content("\n",true); + StringWriter stackTrace=new StringWriter(); + error.getCause().printStackTrace(new PrintWriter(stackTrace)); + writer.content(stackTrace.toString(),true); + writer.closeTag(); + } + writer.closeTag(); + } + + /** + * Renders this hit as XML, disregarding the given template. + * The main error will be rendered first, the all the following errors. + */ + public boolean simpleRenderErrorHit(Writer writer, com.yahoo.search.result.ErrorHit errorHit) throws IOException { + XMLWriter xmlWriter=(writer instanceof XMLWriter) ? (XMLWriter)writer : new XMLWriter(writer,10,-1); + xmlWriter.openTag("errordetails"); + for (Iterator i = errorHit.errorIterator(); i.hasNext();) { + ErrorMessage error = (ErrorMessage) i.next(); + rendererErrorHitMessageMessage(xmlWriter, errorHit, error); + } + xmlWriter.closeTag(); + return true; + } + + public static void rendererErrorHitMessageMessage(XMLWriter writer, com.yahoo.search.result.ErrorHit errorHit, ErrorMessage error) throws IOException { + writer.openTag("error"); + if (errorHit instanceof Hit) { + writer.attribute("source", ((Hit) errorHit).getSource()); + } + writer.attribute("error",error.getMessage()); + writer.attribute("code",Integer.toString(error.getCode())); + writer.content(error.getDetailedMessage(),false); + writer.closeTag(); + } + + /** + * For internal use only + */ + public UserTemplate getAdaptee() { + return templates.getDelegate(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/Template.java b/container-search/src/main/java/com/yahoo/prelude/templates/Template.java new file mode 100644 index 00000000000..7052671a584 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/Template.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import java.io.Writer; + + +/** + * A template turns a template string and some state into + * an instantiated string. Add support for a particular + * template mechanism by subclassing this. + * + * @author bratseth + */ +public abstract class Template<T extends Writer> { + + /** + * Renders this template + * + * @param context the context to evaluate in + * @param writer the writer to render to + */ + public abstract void render(Context context,T writer) + throws java.io.IOException; + + + /** + * Get template name + * + * @return template name + */ + public abstract String getName(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java new file mode 100644 index 00000000000..6cf6ee640a7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/TemplateSet.java @@ -0,0 +1,214 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.text.GenericWriter; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; + +/** + * <p>A template set contains instances of the various templates + * required to render a result.</p> + * + * <p>Normal usage is to create an instance and populate it with templates, + * but this class also supports subclassing to refine the behaviour, + * like returning different templates for different hit types.</p> + * + * @author bratseth + */ +public class TemplateSet<T extends Writer> extends UserTemplate<T> { + + private static final String queryContextTemplateName = "queryContext"; + + private static final DefaultTemplateSet defaultTemplateSet=new DefaultTemplateSet(); + + /** + * Creates a template set containing no templates + * + * @param name the unique name of this template set, used for + * refering to it by clients + */ + public TemplateSet(String name, + String mimeType, + String encoding) { + super(name, mimeType,encoding); + } + + /** + * Returns the default template set. This is a template set which renders in + * the default xml format + */ + public static UserTemplate<XMLWriter> getDefault() { + return defaultTemplateSet; + } + + /** + * Returns the result header template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getHeader(Result result) { return (Template<T>) getTemplate("header"); } + + /** + * Sets the header template + * + * @param header the template to use for rendering getHeaders + * @throws NullPointerException if the given template is null + */ + public void setHeader(Template<T> header) { + setTemplateNotNull("header",header); + } + + /** + * Returns the result footer template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getFooter(Result result) { return (Template<T>) getTemplate("footer"); } + + /** + * Sets the footer template + * + * @param footer the template to use for rendering footers + * @throws NullPointerException if the given template is null + */ + public void setFooter(Template<T> footer) { + setTemplateNotNull("footer",footer); + } + + /** + * Returns the empty body template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getNohits(Result result) { return (Template<T>) getTemplate("nohits"); } + + + /** + * @return the template for rendering the query context, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getQueryContext(Result result) { + return (Template<T>) getTemplate(queryContextTemplateName); + } + + /** + * @param template The template to be used for rendering query contexts, never null. + */ + public void setQueryContext(Template<T> template) { + setTemplateNotNull(queryContextTemplateName, template); + } + + /** + * Sets the nohits template + * + * @param nohits the template to use for rendering empty results + * @throws NullPointerException if the given template is null + */ + public void setNohits(Template<T> nohits) { + setTemplateNotNull("nohits",nohits); + } + + /** + * Returns the error body template + * + * @param result the result which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getError(Result result) { return (Template<T>) getTemplate("error"); } + + /** + * Sets the error template + * + * @param error the template to use for rendering errors + * @throws NullPointerException if the given template is null + */ + public void setError(Template<T> error) { + setTemplateNotNull("error",error); + } + + /** + * Returns the hit template + * + * @param resultHit the hit which will use the template + * @return the template to use, never null + */ + @SuppressWarnings("unchecked") + public Template<T> getHit(Hit resultHit) { return (Template<T>) getTemplate("hit"); } + + /** + * Sets the hit template + * + * @param hit the template to use for rendering hits + * @throws NullPointerException if the given template is null + */ + public void setHit(Template<T> hit) { + setTemplateNotNull("hit",hit); + } + + /** + * Returns the hit footer template + * + * @param hit the hit which will use the template + * @return the template to use, or null if no hit footer is used + */ + @SuppressWarnings("unchecked") + public Template<T> getHitFooter(Hit hit) { return (Template<T>) getTemplate("hitfooter"); } + + public String toString() { + return "template set " + getName() + " of type " + getMimeType() + + " [header=" + getTemplate("header") + + ",footer=" + getTemplate("footer") + + ",nohits=" + getTemplate("nohits") + + ",error=" + getTemplate("error") + + ",hit=" + getTemplate("hit") + "]"; + } + + @Override + public void header(Context context, T writer) throws IOException { + getHeader(null).render(context, writer); + } + + @Override + public void footer(Context context, T writer) throws IOException { + getFooter(null).render(context, writer); + } + + @Override + public void hit(Context context, T writer) throws IOException { + getHit(null).render(context, writer); + } + + @Override + public void error(Context context, T writer) throws IOException { + getError(null).render(context, writer); + } + + @Override + public void hitFooter(Context context, T writer) throws IOException { + Template<T> hitFooter = getHitFooter(null); + if (hitFooter != null) + hitFooter.render(context, writer); + } + + @Override + public void noHits(Context context, T writer) throws IOException { + getNohits(null).render(context, writer); + } + + @Override + public void queryContext(Context context, T writer) throws IOException { + getQueryContext(null).render(context, writer); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java b/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java new file mode 100644 index 00000000000..b2564beeb7a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/TiledTemplateSet.java @@ -0,0 +1,337 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.container.ConfigHack; +import com.yahoo.prelude.templates.FormattingOptions.SubtypeFieldWithPrefix; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.model.Renderer; +import com.yahoo.search.pagetemplates.model.Source; +import com.yahoo.search.pagetemplates.result.SectionHitGroup; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.Iterator; +import java.util.Map; + +/** + * A template set which implements the 'tiled' format. + * + * This template implementation requires a few rules to be observed for it to work properly: + * <ul> + * <li>As hit fields are rendered as XML tag names, their name must be compatible with XML tag names.</li> + * <li>Results sections, meta section, provider tags are rendered based on hits having specific types (as in {@link Hit#types()}, + * see table below for a list of hit types that are needed in order for hits to render properly.</li> + * <li>Some fields inside hits corresponding to provider tags (/result/meta/provider) are formatted in a specific way, see provider fields formatting options + * below. Other fields are rendered the usual way.</li> + * </ul> + * + * <p>Hit types required for proper rendering</p> + * <table summary="Hit types required for proper rendering"> + * <tr><td>XML tag path</td><td>Required hit type</td></tr> + * <tr><td>/result/section</td><td>A hit group and have a "section" type</td></tr> + * <tr><td>/result/meta</td><td>A hit group and have a "meta" type</td></tr> + * <tr><td>/result/meta/provider</td><td>A hit that has a "logging" type</td></tr> + * </table> + * + * <p>Provider fields formatting options</p> + * <table summary="Provider fields formatting options"> + * <tr><td>Field</td><td>Formatting</td><td>Field type</td></tr> + * <tr><td>provider</td><td>name attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>scheme</td><td>scheme attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>host</td><td>host attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>port</td><td>port attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>path</td><td>path attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>status</td><td>result attribute of <provider> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_connect</td><td><latency type="connect"> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_start</td><td><latency type="start"> tag</td><td>Provided by container</td></tr> + * <tr><td>latency_finish</td><td><latency type="finish"> tag</td><td>Provided by container</td></tr> + * <tr><td>query_param_*</td><td><parameter name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>header_*</td><td><header name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>response_header_*</td><td><response-header name="..."> tag</td><td>Provided by container</td></tr> + * <tr><td>count_first</td><td><count type="first"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_last</td><td><count type="last"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_total</td><td><count type="total"> tag</td><td>Provided by container</td></tr> + * <tr><td>count_deep</td><td><count type="deep"> tag</td><td>Provided by container</td></tr> + * <tr><td>queryattrs_xorronum</td><td><queryattrs name="xorronum"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_RankFeaturesRewriterAttr</td><td><queryattrs name="RankFeaturesRewriterAttr"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_intlannotator</td><td><queryattrs name="intlannotator"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>queryattrs_category</td><td><queryattrs name="category"> tag</td><td>Provided by YST searcher</td></tr> + * <tr><td>wordcounts_*</td><td><wordcounts word="..."> tag</td><td>Provided by YST searcher</td></tr> + * </table> + * + * @author bratseth + * @author laboisse + */ +public class TiledTemplateSet extends DefaultTemplateSet { + + private FormattingOptions hitOptionsForProvider; + private FormattingOptions hitOptions; + + public TiledTemplateSet() { + this(ConfigHack.TILED_TEMPLATE); + } + + public TiledTemplateSet(String templateName) { + super(templateName); + + // Define formatting options that will be used by various rendering methods + hitOptions = new FormattingOptions(); + // Render provider field as an attribute, not as a regular field + hitOptions.formatFieldAsAttribute("provider", "provider"); + hitOptions.setFieldNotToRender("provider"); + + + // Define formatting options that will be used by various rendering methods, for /result/meta/provider tags + hitOptionsForProvider = new FormattingOptions(); + hitOptionsForProvider.formatFieldAsAttribute("provider", "name"); // Provider name is rendered a provider/@name + // hitOptionsForProvider.formatFieldAsAttribute("uri", "query"); // FIXME Issue with attribute formatting, keeping as regular field for now + hitOptionsForProvider.formatFieldAsAttribute("scheme", "scheme"); + hitOptionsForProvider.formatFieldAsAttribute("host", "host"); + hitOptionsForProvider.formatFieldAsAttribute("port", "port"); + hitOptionsForProvider.formatFieldAsAttribute("path", "path"); + hitOptionsForProvider.formatFieldAsAttribute("status", "result"); + // Latency fields are not defined using prefixes as we know all the field names and prefixes are expensive + hitOptionsForProvider.formatFieldWithSubtype("latency_connect", "latency", "type", "connect"); + hitOptionsForProvider.formatFieldWithSubtype("latency_start", "latency", "type", "start"); + hitOptionsForProvider.formatFieldWithSubtype("latency_finish", "latency", "type", "finish"); + // Must use prefix for query parameters + hitOptionsForProvider.formatFieldWithSubtype("query_param_", "parameter", "name"); + // Must use prefix for getHeaders + hitOptionsForProvider.formatFieldWithSubtype("header_", "header", "name"); + // Must use prefix for response getHeaders + hitOptionsForProvider.formatFieldWithSubtype("response_header_", "response-header", "name"); + // Count fields are not defined using prefixes as we know all the field names and prefixes are expensive + hitOptionsForProvider.formatFieldWithSubtype("count_first", "count", "type", "first"); + hitOptionsForProvider.formatFieldWithSubtype("count_last", "count", "type", "last"); + hitOptionsForProvider.formatFieldWithSubtype("count_total", "count", "type", "total"); + hitOptionsForProvider.formatFieldWithSubtype("count_deep", "count", "type", "deep"); + + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_xorronum", "queryattrs", "name", "xorronum"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_RankFeaturesRewriterAttr", "queryattrs", "name", "RankFeaturesRewriterAttr"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_intlannotator", "queryattrs", "name", "intlannotator"); + hitOptionsForProvider.formatFieldWithSubtype("queryattrs_category", "queryattrs", "name", "category"); + + hitOptionsForProvider.formatFieldWithSubtype("wordcounts_", "wordcounts", "word"); + // Provider field should not be rendered in logging hits as we already have <provider name="..."> + hitOptionsForProvider.setFieldNotToRender("provider"); + } + + @Override + /** Uses an XML writer in this template */ + public XMLWriter wrapWriter(Writer writer) { return new XMLWriter(super.wrapWriter(writer)); } + + @Override + public void header(Context context,XMLWriter writer) throws IOException { + Result result=(Result)context.get("result"); + writer.xmlHeader(getRequestedEncoding(result.getQuery())); + writer.openTag("result").attribute("version","1.0"); + writer.attribute("layout", result.hits().getField("layout")); + renderCoverageAttributes(result.getCoverage(false), writer); + writer.closeStartTag(); + renderSectionContent(result.hits(),writer); + } + + /** + * Augments default hit attributes rendering with formatting options. + * There's also a hacky part: if hit is actually a hit group, tries to use + * the 'type' field in place of the hit's type, to avoid having the 'group' hit type. + */ + @Override + protected void renderHitAttributes(Hit hit, XMLWriter writer) throws IOException { + if (hit instanceof HitGroup) { + String type = hit.getTypeString(); // TODO: This logic is somewhat crazy + if("group".equals(type)) + type = String.valueOf(hit.getField("type")); + writer.attribute("type", type); + } + else { + writer.attribute("type", hit.getTypeString()); + } + + if (hit.getRelevance() != null) + writer.attribute("relevance", hit.getRelevance()); + writer.attribute("source", hit.getSource()); + + for (Map.Entry<String, String> attr : hitOptions.fieldsAsAttributes()) { + Object val = hit.getField(attr.getKey()); + if (val != null) + writer.attribute(attr.getValue(), String.valueOf(val)); + } + } + + @Override + protected void renderField(Context context, Hit hit, Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + String fieldName = entry.getKey(); + + if ( !shouldRenderField(hit, fieldName)) return; + + writer.openTag(fieldName); + renderFieldContent(context, hit, fieldName, writer); + writer.closeTag(); + } + + /** Renders all fields of the hit */ + @Override + protected void renderHitFields(Context context, Hit hit, XMLWriter writer) throws IOException { + renderId(hit.getId(), writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + Map.Entry<String, Object> entry = it.next(); + // Exclude fields that should not be rendered + if (hitOptions.shouldRenderField(entry.getKey())) + renderField(context, hit, entry, writer); + } + } + + @Override + protected boolean shouldRenderField(Hit hit, String fieldName) { + if (fieldName.equals("relevancy")) return false; + if (fieldName.equals("collapseId")) return false; + return true; + } + + /** + * Overrides {@link DefaultTemplateSet#hit(Context, Writer)} + * to print 'logging' type meta hits as /result/meta/provider tags. + * Fails back to {@code super.hit(context, writer)} in other cases. + */ + @Override + public void hit(Context context, XMLWriter writer) throws IOException { + Hit hit = (Hit) context.get("hit"); + if (hit.isMeta() && hit.types().contains("logging")) + renderProvider(context, hit, writer); + else + super.hit(context, writer); + } + + /** + * Overrides {@link DefaultTemplateSet#renderHitGroup(HitGroup, Context, XMLWriter)} + * for /result/section and /result/meta hit groups. + * Fails back to {@code super.renderHitGroup(hit, context, writer)} otherwise. + */ + @Override + protected void renderHitGroup(HitGroup hit, Context context, XMLWriter writer) throws IOException { + if (hit.types().contains("section")) { + renderSection(hit, writer); // Renders /result/section + } + else if (hit.types().contains("meta")) { + writer.openTag("meta"); // renders /result/meta + writer.closeStartTag(); + } + else { + super.renderHitGroup(hit, context, writer); + } + } + + /** + * Renders /result/section. + * Doesn't use {@link #renderHitAttributes(Hit, XMLWriter)}. + */ + protected void renderSection(HitGroup hit, XMLWriter writer) throws IOException { + writer.openTag("section"); + writer.attribute("id",hit.getDisplayId()); + writer.attribute("layout",hit.getField("layout")); + writer.attribute("region",hit.getField("region")); + writer.attribute("placement",hit.getField("placement")); // deprecated in 5.0 + writer.closeStartTag(); + renderSectionContent(hit,writer); + } + + protected void renderSectionContent(HitGroup hit,XMLWriter writer) throws IOException { + if (hit instanceof SectionHitGroup) { // render additional information + SectionHitGroup sectionGroup=(SectionHitGroup)hit; + for (Source source : sectionGroup.sources()) { + writer.openTag("source").attribute("url",source.getUrl()); + renderParameters(source.parameters(),writer); + writer.closeTag(); + } + for (Renderer renderer : sectionGroup.renderers()) { + writer.openTag("renderer").attribute("for",renderer.getRendererFor()).attribute("name",renderer.getName()); + renderParameters(renderer.parameters(),writer); + writer.closeTag(); + } + } + } + + private void renderParameters(Map<String,String> parameters,XMLWriter writer) throws IOException { + // Render content + for (Map.Entry<String, String> parameter : parameters.entrySet()) + writer.openTag("parameter").attribute("name",parameter.getKey()).content(parameter.getValue(),false).closeTag(); + } + + /** + * Renders /result/meta/provider. + * Uses {@link #renderProviderHitAttributes(Hit, XMLWriter)} instead of the default {@link #renderHitAttributes(Hit, XMLWriter)}. + * @see #renderProviderHitAttributes(Hit, XMLWriter) + * @see #renderProviderHitFields(Context, Hit, XMLWriter) + */ + protected void renderProvider(Context context, Hit hit, XMLWriter writer) + throws IOException { + writer.openTag("provider"); + renderProviderHitAttributes(hit, writer); + writer.closeStartTag(); + renderProviderHitFields(context, hit, writer); + } + + /** + * Specific hit attributes rendering for 'provider' meta hits under /result/meta. + */ + protected void renderProviderHitAttributes(Hit hit, XMLWriter writer) throws IOException { + // Browse through fields that should be rendered as attributes + for (Map.Entry<String, String> attr : hitOptionsForProvider.fieldsAsAttributes()) + writer.attribute(attr.getValue(),hit.getField(attr.getKey())); + } + + + /** + * Renders fields under /result/meta/provider. + * + * @see #renderProviderField(Context, Hit, java.util.Map.Entry, XMLWriter) + */ + protected void renderProviderHitFields(Context context, Hit hit, XMLWriter writer) + throws IOException { + renderId(hit.getId(), writer); + for (Iterator<Map.Entry<String, Object>> it = hit.fieldIterator(); it.hasNext(); ) { + Map.Entry<String, Object> entry = it.next(); + // Exclude fields that have already been rendered as attributes and + // fields that should not be rendered + if (hitOptionsForProvider.getAttributeName(entry.getKey()) == null + && hitOptionsForProvider.shouldRenderField(entry.getKey())) + renderProviderField(context, hit, entry, writer); + } + } + + /** + * Renders one field under /result/meta/provider. + */ + protected void renderProviderField(Context context, Hit hit, + Map.Entry<String, Object> entry, XMLWriter writer) throws IOException { + + String name = entry.getKey(); + FormattingOptions.SubtypeField subtypeField = hitOptionsForProvider.getSubtype(name); + if (subtypeField == null) + subtypeField = hitOptionsForProvider.getSubtypeWithPrefix(name); + + if (subtypeField != null) { + writer.openTag(subtypeField.tagName); + if (subtypeField.attributeValue != null) { + writer.attribute(subtypeField.attributeName,subtypeField.attributeValue); + } + else if (subtypeField instanceof SubtypeFieldWithPrefix) { + // This is a subtype field that was defined using a prefix + // get the remaining part of the field name + writer.attribute(subtypeField.attributeName, + name.substring(((SubtypeFieldWithPrefix)subtypeField).prefixLength)); + } + } else { + writer.openTag(name); + } + writer.escapedContent(hit.getFieldXML(name),false).closeTag(); + } + + + public String toString() { return "tiled result template"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java b/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java new file mode 100644 index 00000000000..bcc3b3c6390 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/UserTemplate.java @@ -0,0 +1,323 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.templates; + +import com.yahoo.io.ByteWriter; +import com.yahoo.prelude.fastsearch.FastHit; +import com.yahoo.prelude.fastsearch.XMLField; +import com.yahoo.search.Result; +import com.yahoo.text.Utf8String; +import com.yahoo.text.XMLWriter; + +import java.io.IOException; +import java.io.Writer; +import java.util.logging.Logger; + + +/** + * A wrapper for a template set, suitable for subclassing. + * + * <p> + * A subclass of UserTemplate must implement header(), footer(), hit(), + * hitFooter(), error() and noHits(). + * + * @author Steinar Knutsen + */ +@SuppressWarnings("deprecation") +public abstract class UserTemplate<T extends Writer> extends GenericTemplateSet { + + // & + private static final byte[] ampersand = new byte[] { 38, 97, 109, 112, 59 }; + + // < + private static final byte[] lessThan = new byte[] { 38, 108, 116, 59 }; + // > + private static final byte[] greaterThan = new byte[] { 38, 103, 116, 59 }; + + // \\u00 + private static final byte[] quotePrefix = new byte[] { 92, 117, 48, 48 }; + + private static final Logger log = Logger.getLogger(UserTemplate.class.getName()); + + /** + * The signature of this constructor is the one which is invoked + * in a production setting. + */ + public UserTemplate(String name, String mimeType, + String encoding) { + super(name, mimeType, encoding); + } + + public UserTemplate(String name) { + this(name, + DEFAULT_MIMETYPE, + DEFAULT_ENCODING + ); + } + + /** + * This is called once before each result is rendered using this template. + * The returned writer is used in all subsequent calls. Use this if another (wrapper) + * writer of the raw incoming writer is desired in the implementation of this template. + * The class of the returned type must be given as a type argument to the template class, + * to be able to implement methods taking this wrapper writer as the argument type. + * This default implementation returns an XMLWriter. + */ + @SuppressWarnings("unchecked") + public T wrapWriter(Writer writer) { + //FIXME: Hack + return (T) XMLWriter.from(writer, 10, -1); + } + + /** + * Creates a new context suitable for this template. + * The context may be reused for several evaluations, but not multiple + * concurrent evaluations + */ + public Context createContext() { + return new MapContext(); + } + + + /** + * For internal use only + * TODO: get rid of this method * + */ + public boolean isDefaultTemplateSet() { + return getClass().equals(TemplateSet.getDefault().getClass()); + } + + /** + * Render the result set header. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the result + * @throws IOException + * may be propagated from the writer + */ + public abstract void header(Context context, T writer) + throws IOException; + + /** + * Render the result set footer. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the result + * @throws IOException + * may be propagated from the writer + */ + public abstract void footer(Context context, T writer) + throws IOException; + + /** + * Render a single top level hit. + * + * <p> + * The result set is available in the context object under the name + * "result". The hit itself as "hit", the index of the hit as "hitno", and + * all the fields under their normal names. + * + * @param context + * wrapper which will contain, among other thing, the hit + * instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void hit(Context context, T writer) throws IOException; + + /** + * Render a footer for a single top level hit. A typical implementation may + * do nothing. + * + * <p> + * The result set is available in the context object under the name + * "result". The hit itself as "hit", the index of the hit as "hitno", and + * all the fields under their normal names. + * + * @param context + * wrapper which will contain, among other thing, the hit + * instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void hitFooter(Context context, T writer) + throws IOException; + + /** + * Render the error message for a result set. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, main error + * and result set instances. + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void error(Context context, T writer) + throws IOException; + + /** + * Invoked when the result set has no hits. + * + * <p> + * The result set is available in the context object under the name + * "result". + * + * @param context + * wrapper which will contain, among other thing, the result + * set instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public abstract void noHits(Context context, T writer) + throws IOException; + + /** + * Override this to add custom rendering for the query context of the result. + * Only called when the query context is present. + * + * <p> + * The result set is available in the context object under the name + * "result". The query context is retrieved from the result by calling + * result.getQuery.getContext(false) + * + * @param context + * wrapper which will contain, among other things, the result + * set instance + * @param writer + * the destination for rendering the hit + * @throws IOException + * may be propagated from the writer + */ + public void queryContext(Context context, T writer) throws IOException { + Result result = (Result) context.get("result"); + result.getContext(false).render(writer); + } + + /** + * Dump UTF-8 byte array to writer, but escape low ASCII codes except + * TAB, NL and CR, and escape ampersand, less than and greater than. + * + * <p> + * It is presumed the writer is buffered (which is the case in normal + * result rendering), as the method may perform a large number of write + * operations. + * + * <p> + * public only for testing. + */ + public static void dumpAndXMLQuoteUTF8(ByteWriter writer, byte[] utf) throws java.io.IOException { + int startDump = 0; + + for (int i = 0; i < utf.length; ++i) { + byte b = utf[i]; + if (b < 0) { + // Not ASCII, above character 127 + // Don't try to do something smart with UNICODE characters, + // just pass them through. + } else if (b < 32) { + switch (b) { + case 9: + case 10: + case 13: + break; + default: + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + quoteByte(writer, b); + break; + } + } else { + // printable ASCII + // quote special characters, otherwise do nothing + switch (b) { + // case 34: // double quote + // writer.append(utf, startDump, i - startDump); + // startDump = i + 1; + // writer.append(doubleQuote); + // break; + case 38: // ampersand + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(ampersand); + break; + case 60: // less than + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(lessThan); + break; + case 62: // greater than + writer.append(utf, startDump, i - startDump); + startDump = i + 1; + writer.append(greaterThan); + break; + } + } + } + if (startDump < utf.length) { + writer.append(utf, startDump, utf.length - startDump); + } + } + + /** + * If the field is available as a UTF-8 byte array, + * dump it to the writer. + */ + public static boolean dumpBytes(ByteWriter writer, + FastHit hit, + String fieldName) throws java.io.IOException { + FastHit.RawField asBytes; + try { + asBytes = hit.fetchFieldAsUtf8(fieldName); + } catch (RuntimeException e) { + asBytes = null; + } + if (asBytes != null) { + if (asBytes.needXmlEscape()) { + dumpAndXMLQuoteUTF8(writer, asBytes.getUtf8()); + } else { + writer.append(asBytes.getUtf8()); + } + return true; + } + return false; + } + + private static void quoteByte(ByteWriter writer, byte b) throws java.io.IOException { + byte[] quoted = new byte[2]; + writer.append(quotePrefix); + quoted[0] = (byte) ((b >>> 4) + 0x30); + if (quoted[0] > 0x39) { + quoted[0] = (byte) (quoted[0] + 7); + } + quoted[1] = (byte) ((b & 0x0f) + 0x30); + if (quoted[1] > 0x39) { + quoted[1] = (byte) (quoted[1] + 7); + } + writer.append(quoted); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java b/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java new file mode 100644 index 00000000000..7a273c6415f --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/templates/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +@PublicApi +package com.yahoo.prelude.templates; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; |