// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; import com.yahoo.collections.CopyOnWriteHashMap; import com.yahoo.compress.IntegerCompressor; import com.yahoo.language.Language; import com.yahoo.prelude.query.textualrepresentation.Discloser; import com.yahoo.search.query.QueryTree; import com.yahoo.text.Utf8; import java.nio.ByteBuffer; import java.util.Objects; import java.util.Optional; /** *
A term of the query language. As "term" is also the common term (sorry) * for a literal to be found (or not) in a search index, the term item * is used for query language terms.
* *The query is represented as a composite tree of * Item subclasses. This allow arbitrary complex combinations of ands, * nots, phrases and so on.
* *Items are in general mutable and not thread safe.
* * @author bratseth * @author havardpe */ public abstract class Item implements Cloneable { /** * The definitions in Item.ItemType must match the ones in * searchlib/src/vespa/searchlib/parsequery/parse.h */ public static enum ItemType { OR(0), AND(1), NOT(2), RANK(3), WORD(4), INT(5), PHRASE(6), PAREN(7), // TODO not used - remove on Vespa 8 PREFIX(8), SUBSTRING(9), NEAR(11), ONEAR(12), SUFFIX(13), EQUIV(14), WEIGHTEDSET(15), WEAK_AND(16), EXACT(17), SAME_ELEMENT(18), PURE_WEIGHTED_STRING(19), PURE_WEIGHTED_INTEGER(20), DOTPRODUCT(21), WAND(22), PREDICATE_QUERY(23), REGEXP(24), WORD_ALTERNATIVES(25), NEAREST_NEIGHBOR(26), GEO_LOCATION_TERM(27); public final int code; private ItemType(int code) { this.code = code; } } public static final int DEFAULT_WEIGHT = 100; /** The relative importance of this term in the query. Default is 100 */ private int weight = DEFAULT_WEIGHT; /** * The definitions in Item.ItemCreator must match the ones in * searchlib/src/searchlib/parsequery/parse.h */ public enum ItemCreator { ORIG(0), FILTER(1); public final int code; ItemCreator(int code) { this.code = code; } } private boolean fromSpecialToken = false; private ItemCreator creator = ItemCreator.ORIG; /** The parent in the query tree, or null if this is a root */ private CompositeItem parent = null; /** The annotations made on this item */ private CopyOnWriteHashMapReturns the canonical query language string of this item.
* *The canonical language represent an item by the string *
* ([itemName] [body]) ** where the body may recursively be other items. * *
* TODO: Change the output query language into a canonical form of the input * query language */ @Override public String toString() { StringBuilder buffer = new StringBuilder(); if (shouldParenthize()) { buffer.append("("); } if (isFilter()) { buffer.append("|"); } appendHeadingString(buffer); appendBodyString(buffer); if (shouldParenthize()) { buffer.append(")"); } if (weight != DEFAULT_WEIGHT) { buffer.append("!"); buffer.append(weight); } return buffer.toString(); } /** * Returns whether or not this item should be parethized when printed. * Default is false - no parentheses */ protected boolean shouldParenthize() { return false; } /** Appends the heading of this string. As default getName() followed by a space. */ protected void appendHeadingString(StringBuilder buffer) { buffer.append(getName()); buffer.append(" "); } /** * Override to append the item body in the canonical query language of this item. * An item is usually represented by the string *
* ([itemName] [body]) ** The body must be appended appended by this method. */ protected abstract void appendBodyString(StringBuilder buffer); /** Returns a deep copy of this item */ @Override public Item clone() { try { Item clone = (Item)super.clone(); if (this.annotations != null) clone.annotations = this.annotations.clone(); // note: connectedItem and connectedBacklink references are corrected in CompositeItem.clone() return clone; } catch (CloneNotSupportedException e) { throw new RuntimeException("Someone made Item unclonable"); } } /** * Returns whether this item is of the same class and * contains the same state as the given item */ @Override public boolean equals(Object object) { if (object == null) { return false; } if (object.getClass() != this.getClass()) { return false; } // Fails on different c.l.'s Item other = (Item) object; if (this.creator != other.creator) { return false; } if (this.weight != other.weight) { return false; } return true; } @Override public int hashCode() { return weight * 29 + creator.code; } protected boolean hasUniqueID() { return hasUniqueID; } protected void setHasUniqueID(boolean hasUniqueID) { this.hasUniqueID = hasUniqueID; } /** * Label this item with a symbolic name which can later be used by * the back-end to identify specific items for ranking purposes. * * @param label label for this item **/ public void setLabel(String label) { setHasUniqueID(true); this.label = label; } /** Returns the label for this item. This method will return null if no label has been set. */ public String getLabel() { return label; } /** * Sets whether or not this term item should affect ranking. * If set to false this term is not exposed to the ranking framework in the search backend. */ public void setRanked(boolean isRanked) { this.isRanked = isRanked; } /** Returns whether or not this item should affect ranking. */ public boolean isRanked() { return isRanked; } /** * Sets whether or not position data should be used when ranking this term item. * If set to false the search backend uses fast bit vector data structures when matching on this term * and only a few simple ranking features will be available when ranking this term. * Note that setting this to false also saves a lot of CPU during matching as bit vector data structures are used. */ public void setPositionData(boolean usePositionData) { this.usePositionData = usePositionData; } /** Returns whether or not position data should be used when ranking this item */ public boolean usePositionData() { return usePositionData; } public void disclose(Discloser discloser) { discloser.addProperty("connectivity", connectivity); discloser.addProperty("connectedItem", connectedItem); //reference discloser.addProperty("creator", creator); discloser.addProperty("explicitSignificance", explicitSignificance); discloser.addProperty("isRanked", isRanked); discloser.addProperty("usePositionData", usePositionData); discloser.addProperty("significance", significance); discloser.addProperty("weight", weight); if (label != null) { discloser.addProperty("label", label); } if (hasUniqueID) { discloser.addProperty("uniqueID", uniqueID); } } public boolean isFromSpecialToken() { return fromSpecialToken; } public void setFromSpecialToken(boolean fromSpecialToken) { this.fromSpecialToken = fromSpecialToken; } /** Returns the language of any natural language text below this item, or Language.UNKNOWN if not set. */ public Language getLanguage() { return language; } /** * Sets the language of any natural language text below this item. * This cannot be set to null but can be set to Language.UNKNOWN */ public void setLanguage(Language language) { Objects.requireNonNull(language, "Language cannot be null"); this.language = language; } /** * DO NOT USE */ public boolean hasConnectivityBackLink() { return connectedBacklink != null; } /** Returns true if this is the root item - that is if the parent is the QueryTree (or null for legacy reasons)*/ public boolean isRoot() { if (getParent()==null) return true; if (getParent() instanceof QueryTree) return true; return false; } }