diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query')
8 files changed, 146 insertions, 30 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java index eee9949d831..b9e8d700cd6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java @@ -9,6 +9,7 @@ import com.yahoo.search.query.QueryTree; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.ListIterator; @@ -162,16 +163,19 @@ public abstract class CompositeItem extends Item { return removed; } - /** Returns the number of direct ancestors of this item */ + /** Returns the number of direct children of this item */ public int getItemCount() { return subitems.size(); } - /** Returns a modifiable list iterator */ + /** Returns a modifiable list iterator of the immediate children of this */ public ListIterator<Item> getItemIterator() { return new ListIteratorWrapper(this); } + /** Returns a read only list of the immediate children of this */ + public List<Item> items() { return Collections.unmodifiableList(subitems); } + public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java index 7d9ae751930..b1912e4128d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java @@ -68,4 +68,5 @@ public abstract class CompositeTaggableItem extends CompositeItem implements Tag public boolean hasUniqueID() { return super.hasUniqueID(); } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index d8b5fc9451a..9d8ccce1b76 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -373,6 +373,7 @@ public abstract class Item implements Cloneable { protected abstract void appendBodyString(StringBuilder buffer); /** Returns a deep copy of this item */ + @Override public Item clone() { try { Item clone = (Item)super.clone(); @@ -389,6 +390,7 @@ public abstract class Item implements Cloneable { * Returns whether this item is of the same class and * contains the same state as the given item */ + @Override public boolean equals(Object object) { if (object == null) { return false; @@ -405,11 +407,11 @@ public abstract class Item implements Cloneable { if (this.weight != other.weight) { return false; } - // if (this.termIndex!=other.termIndex) return false; return true; } + @Override public int hashCode() { return weight * 29 + creator.code; } @@ -433,12 +435,7 @@ public abstract class Item implements Cloneable { this.label = label; } - /** - * Obtain the label for this item. This method will return null if - * no label has been set. - * - * @return label for this item - **/ + /** Returns the label for this item. This method will return null if no label has been set. */ public String getLabel() { return label; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java index 9fa530eb5ea..4e7036e3481 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java @@ -9,32 +9,31 @@ import com.yahoo.prelude.query.textualrepresentation.Discloser; * Special words known by the index used for marking things. * The reserved word itself is not public, while a symbol representation is. * - * @author bratseth + * @author bratseth */ public class MarkerWordItem extends WordItem { - /** Creates a special word item which marks the start of a host name */ - public static WordItem createStartOfHost() { - return new MarkerWordItem("^", UrlTokenizer.TERM_STARTHOST); - } - - /** Creates a special word item which marks the end of a host name */ - public static WordItem createEndOfHost() { - return new MarkerWordItem("$", UrlTokenizer.TERM_ENDHOST); - } + private final String markerWord; - private String markerWord; + private final static String startAnchor = "^"; + private final static String endAnchor = "$"; - private MarkerWordItem(String publicSymbol, String markerWord) { - super(publicSymbol); + private MarkerWordItem(String publicSymbol, String markerWord, String indexName) { + super(publicSymbol, indexName); this.markerWord = markerWord; } + public boolean isStartAnchor() { return getWord().equals(startAnchor); } + + public boolean isEndAnchor() { return getWord().equals(endAnchor); } + /** Returns the marker word for encoding */ + @Override protected String getEncodedWord() { return markerWord; } + @Override public boolean equals(Object o) { if (!super.equals(o)) { return false; @@ -48,6 +47,7 @@ public class MarkerWordItem extends WordItem { return markerWord.equals(other.markerWord); } + @Override public int hashCode() { return super.hashCode() + 499 * markerWord.hashCode(); } @@ -57,4 +57,25 @@ public class MarkerWordItem extends WordItem { super.disclose(discloser); discloser.addProperty("markerWord", markerWord); } + + /** Creates a special word item which marks the start of a host name */ + public static MarkerWordItem createStartOfHost(String indexName) { + return new MarkerWordItem(startAnchor, UrlTokenizer.TERM_STARTHOST, indexName); + } + + /** Creates a special word item which marks the start of a host name, matching the default index */ + public static MarkerWordItem createStartOfHost() { + return createStartOfHost(""); + } + + /** Creates a special word item which marks the end of a host name */ + public static MarkerWordItem createEndOfHost(String indexName) { + return new MarkerWordItem(endAnchor, UrlTokenizer.TERM_ENDHOST, indexName); + } + + /** Creates a special word item which marks the end of a host name matching the default index */ + public static MarkerWordItem createEndOfHost() { + return createEndOfHost(""); + } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java index d7aec40bcf9..26da5eec7eb 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java @@ -32,10 +32,12 @@ public class PhraseItem extends CompositeIndexedItem { } } + @Override public ItemType getItemType() { return ItemType.PHRASE; } + @Override public String getName() { return "PHRASE"; } @@ -162,10 +164,12 @@ public class PhraseItem extends CompositeIndexedItem { return (BlockItem) getItem(index); } + @Override protected void encodeThis(ByteBuffer buffer) { super.encodeThis(buffer); // takes care of index bytes } + @Override public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; @@ -186,13 +190,16 @@ public class PhraseItem extends CompositeIndexedItem { } /** Returns false, no parenthezes for phrases */ + @Override protected boolean shouldParenthize() { return false; } /** Phrase items uses a empty heading instead of "PHRASE " */ + @Override protected void appendHeadingString(StringBuilder buffer) { } + @Override protected void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java b/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java new file mode 100644 index 00000000000..b8d7ea7aa11 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java @@ -0,0 +1,81 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; +import java.util.stream.Collectors; + +/** + * An item matching a uri field. + * This is syntactic sugar for a phrase with anchor items. + * + * @author bratseth + */ +public class UriItem extends PhraseItem { + + // Fields here are auxiliary information not needed for semantics but used preserve a nice canonical form + private boolean startAnchorDefault = false; + private boolean endAnchorDefault = false; + private String sourceString; + + public UriItem() { + super(); + } + + public UriItem(String indexName) { + super(indexName); + } + + /** + * Adds a start anchor as the *current* first item. + * This does not enforce that the items stays at the start if further items are added. + * */ + public void addStartAnchorItem() { + addItem(0, MarkerWordItem.createStartOfHost(getIndexName())); + } + + /** + * Adds an end anchor as the *current* last item. + * This does not enforce that the items stays at the end if further items are added. + */ + public void addEndAnchorItem() { + addItem(MarkerWordItem.createEndOfHost(getIndexName())); + } + + /** Returns whether the first item of this is a start anchor */ + public boolean hasStartAnchor() { + return getItem(0) instanceof MarkerWordItem + && ((MarkerWordItem)getItem(0)).isStartAnchor(); + } + + /** Returns whether the last item of this is an end anchor */ + public boolean hasEndAnchor() { + return getItem(getItemCount()-1) instanceof MarkerWordItem + && ((MarkerWordItem)getItem(getItemCount()-1)).isEndAnchor(); + } + + public boolean isStartAnchorDefault() { return startAnchorDefault; } + public void setStartAnchorDefault(boolean startAnchorDefault) { this.startAnchorDefault = startAnchorDefault; } + + public boolean isEndAnchorDefault() { return endAnchorDefault; } + public void setEndAnchorDefault(boolean endAnchorDefault) { this.endAnchorDefault = endAnchorDefault; } + + public void setSourceString(String sourceString) { this.sourceString = sourceString; } + + /** + * Returns the canonical form of the tokens of this: Either the source string, or if none + * each token except the start and end anchor separated by space + */ + public String getArgumentString() { + if (sourceString != null) return sourceString; + + List<Item> items = new ArrayList<>(items()); + if (hasStartAnchor()) + items.remove(0); + if (hasEndAnchor()) + items.remove(items.size() - 1); + return items.stream().map(item -> ((WordItem)item).getWord()).collect(Collectors.joining(" ")); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java index 9652caf8d58..1923fdbc50d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -87,14 +87,10 @@ public abstract class AbstractParser implements CustomParser { } /** - * Returns whether we are in a mode which allows explicit anchoring - * markers, ^ and $ - * - * @return True if we are doing explicit anchoring. + * Returns whether we are in a mode which allows explicit anchoring markers, ^ and $ */ - public boolean explicitAnchoring() { - return site; - } + public boolean explicitAnchoring() { return site; } + } /** diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java index 8ecd4d8f81c..8c77b3d2130 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java @@ -557,7 +557,16 @@ abstract class StructuredParser extends AbstractParser { if (phrase != null) { phrase.addItem(word); } else if (firstWord != null) { - phrase = new PhraseItem(); + if (submodes.site || submodes.url) { + UriItem uriItem = new UriItem(); + if (submodes.site) + uriItem.setEndAnchorDefault(true); + phrase = uriItem; + } + else { + phrase = new PhraseItem(); + } + if (quoted || submodes.site || submodes.url) { phrase.setExplicit(true); } |