From 330a92e516e992f870ce83b66c6fe7f9335c5b0c Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 23 May 2019 14:50:58 +0200 Subject: Support URI matching in YQL --- .../java/com/yahoo/searchdefinition/Search.java | 18 ++- container-search/abi-spec.json | 33 +++++- .../main/java/com/yahoo/prelude/IndexModel.java | 6 + .../com/yahoo/prelude/query/CompositeItem.java | 8 +- .../yahoo/prelude/query/CompositeTaggableItem.java | 1 + .../main/java/com/yahoo/prelude/query/Item.java | 11 +- .../com/yahoo/prelude/query/MarkerWordItem.java | 47 +++++--- .../java/com/yahoo/prelude/query/PhraseItem.java | 7 ++ .../main/java/com/yahoo/prelude/query/UriItem.java | 81 ++++++++++++++ .../yahoo/prelude/query/parser/AbstractParser.java | 10 +- .../prelude/query/parser/StructuredParser.java | 11 +- .../com/yahoo/search/yql/MinimalQueryInserter.java | 5 +- .../java/com/yahoo/search/yql/VespaSerializer.java | 46 ++++++++ .../main/java/com/yahoo/search/yql/YqlParser.java | 99 ++++++++++------ .../test/ExactMatchAndDefaultIndexTestCase.java | 2 +- .../prelude/query/parser/test/ParseTestCase.java | 6 +- .../query/parser/test/TokenizerTestCase.java | 10 +- .../com/yahoo/prelude/test/IndexFactsTestCase.java | 4 +- .../java/com/yahoo/prelude/test/QueryTestCase.java | 2 +- .../search/querytransform/LowercasingTestCase.java | 2 +- .../querytransform/WandSearcherTestCase.java | 2 +- .../querytransform/test/NGramSearcherTestCase.java | 2 +- .../test/RangeQueryOptimizerTestCase.java | 2 +- .../test/SortingDegraderTestCase.java | 2 +- .../java/com/yahoo/search/test/QueryTestCase.java | 2 +- .../com/yahoo/search/yql/YqlParserTestCase.java | 124 ++++++++++++++++++++- 26 files changed, 442 insertions(+), 101 deletions(-) create mode 100644 container-search/src/main/java/com/yahoo/prelude/query/UriItem.java diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java index a99300cfd32..c4c33eab462 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Search.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Search.java @@ -55,32 +55,30 @@ public class Search implements Serializable, ImmutableSearch { return RESERVED_NAMES.contains(name); } - // Field sets private FieldSets fieldSets = new FieldSets(); - // The unique name of this search definition. + /** The unique name of this search definition */ private String name; - // True if this doesn't define a search, just some documents. + /** True if this doesn't define a search, just a document type */ private boolean documentsOnly = false; - // The stemming setting of this search definition. Default is BEST. + /** The stemming setting of this search definition. Default is BEST. */ private Stemming stemming = Stemming.BEST; - // Documents contained in this definition. + /** Documents contained in this definition */ private SDDocumentType docType; - // The extra fields of this search definition. + /** The extra fields of this search definition */ private Map fields = new LinkedHashMap<>(); - // The explicitly defined indices of this search definition. + /** The explicitly defined indices of this search definition */ private Map indices = new LinkedHashMap<>(); - // The explicitly defined summaries of this search definition. - // _Must_ preserve order + /** The explicitly defined summaries of this search definition. _Must_ preserve order. */ private Map summaries = new LinkedHashMap<>(); - // Ranking constants of this + /** Ranking constants of this */ private RankingConstants rankingConstants = new RankingConstants(); private Optional temporaryImportedFields = Optional.of(new TemporaryImportedFields()); diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index fb50da7bff1..33ccc8a46fa 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -424,6 +424,7 @@ "public boolean removeItem(com.yahoo.prelude.query.Item)", "public int getItemCount()", "public java.util.ListIterator getItemIterator()", + "public java.util.List items()", "public int encode(java.nio.ByteBuffer)", "protected void encodeThis(java.nio.ByteBuffer)", "protected int encodingArity()", @@ -808,12 +809,16 @@ "public" ], "methods": [ - "public static com.yahoo.prelude.query.WordItem createStartOfHost()", - "public static com.yahoo.prelude.query.WordItem createEndOfHost()", + "public boolean isStartAnchor()", + "public boolean isEndAnchor()", "protected java.lang.String getEncodedWord()", "public boolean equals(java.lang.Object)", "public int hashCode()", - "public void disclose(com.yahoo.prelude.query.textualrepresentation.Discloser)" + "public void disclose(com.yahoo.prelude.query.textualrepresentation.Discloser)", + "public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost(java.lang.String)", + "public static com.yahoo.prelude.query.MarkerWordItem createStartOfHost()", + "public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost(java.lang.String)", + "public static com.yahoo.prelude.query.MarkerWordItem createEndOfHost()" ], "fields": [] }, @@ -1523,6 +1528,28 @@ ], "fields": [] }, + "com.yahoo.prelude.query.UriItem": { + "superClass": "com.yahoo.prelude.query.PhraseItem", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void ()", + "public void (java.lang.String)", + "public void addStartAnchorItem()", + "public void addEndAnchorItem()", + "public boolean hasStartAnchor()", + "public boolean hasEndAnchor()", + "public boolean isStartAnchorDefault()", + "public void setStartAnchorDefault(boolean)", + "public boolean isEndAnchorDefault()", + "public void setEndAnchorDefault(boolean)", + "public void setSourceString(java.lang.String)", + "public java.lang.String getArgumentString()" + ], + "fields": [] + }, "com.yahoo.prelude.query.WandItem": { "superClass": "com.yahoo.prelude.query.WeightedSetItem", "interfaces": [], diff --git a/container-search/src/main/java/com/yahoo/prelude/IndexModel.java b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java index dd2bd1ee2f1..c83294efed7 100644 --- a/container-search/src/main/java/com/yahoo/prelude/IndexModel.java +++ b/container-search/src/main/java/com/yahoo/prelude/IndexModel.java @@ -3,6 +3,7 @@ package com.yahoo.prelude; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -28,6 +29,11 @@ public final class IndexModel { private Map searchDefinitions; private SearchDefinition unionSearchDefinition; + /** Create an index model for a single search definition */ + public IndexModel(SearchDefinition searchDefinition) { + this(Collections.emptyMap(), Collections.singleton(searchDefinition)); + } + /** * Create an index model. */ diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java index eee9949d831..b9e8d700cd6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeItem.java @@ -9,6 +9,7 @@ import com.yahoo.search.query.QueryTree; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.ListIterator; @@ -162,16 +163,19 @@ public abstract class CompositeItem extends Item { return removed; } - /** Returns the number of direct ancestors of this item */ + /** Returns the number of direct children of this item */ public int getItemCount() { return subitems.size(); } - /** Returns a modifiable list iterator */ + /** Returns a modifiable list iterator of the immediate children of this */ public ListIterator getItemIterator() { return new ListIteratorWrapper(this); } + /** Returns a read only list of the immediate children of this */ + public List items() { return Collections.unmodifiableList(subitems); } + public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; diff --git a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java index 7d9ae751930..b1912e4128d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/CompositeTaggableItem.java @@ -68,4 +68,5 @@ public abstract class CompositeTaggableItem extends CompositeItem implements Tag public boolean hasUniqueID() { return super.hasUniqueID(); } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index d8b5fc9451a..9d8ccce1b76 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -373,6 +373,7 @@ public abstract class Item implements Cloneable { protected abstract void appendBodyString(StringBuilder buffer); /** Returns a deep copy of this item */ + @Override public Item clone() { try { Item clone = (Item)super.clone(); @@ -389,6 +390,7 @@ public abstract class Item implements Cloneable { * Returns whether this item is of the same class and * contains the same state as the given item */ + @Override public boolean equals(Object object) { if (object == null) { return false; @@ -405,11 +407,11 @@ public abstract class Item implements Cloneable { if (this.weight != other.weight) { return false; } - // if (this.termIndex!=other.termIndex) return false; return true; } + @Override public int hashCode() { return weight * 29 + creator.code; } @@ -433,12 +435,7 @@ public abstract class Item implements Cloneable { this.label = label; } - /** - * Obtain the label for this item. This method will return null if - * no label has been set. - * - * @return label for this item - **/ + /** Returns the label for this item. This method will return null if no label has been set. */ public String getLabel() { return label; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java index 9fa530eb5ea..4e7036e3481 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/MarkerWordItem.java @@ -9,32 +9,31 @@ import com.yahoo.prelude.query.textualrepresentation.Discloser; * Special words known by the index used for marking things. * The reserved word itself is not public, while a symbol representation is. * - * @author bratseth + * @author bratseth */ public class MarkerWordItem extends WordItem { - /** Creates a special word item which marks the start of a host name */ - public static WordItem createStartOfHost() { - return new MarkerWordItem("^", UrlTokenizer.TERM_STARTHOST); - } - - /** Creates a special word item which marks the end of a host name */ - public static WordItem createEndOfHost() { - return new MarkerWordItem("$", UrlTokenizer.TERM_ENDHOST); - } + private final String markerWord; - private String markerWord; + private final static String startAnchor = "^"; + private final static String endAnchor = "$"; - private MarkerWordItem(String publicSymbol, String markerWord) { - super(publicSymbol); + private MarkerWordItem(String publicSymbol, String markerWord, String indexName) { + super(publicSymbol, indexName); this.markerWord = markerWord; } + public boolean isStartAnchor() { return getWord().equals(startAnchor); } + + public boolean isEndAnchor() { return getWord().equals(endAnchor); } + /** Returns the marker word for encoding */ + @Override protected String getEncodedWord() { return markerWord; } + @Override public boolean equals(Object o) { if (!super.equals(o)) { return false; @@ -48,6 +47,7 @@ public class MarkerWordItem extends WordItem { return markerWord.equals(other.markerWord); } + @Override public int hashCode() { return super.hashCode() + 499 * markerWord.hashCode(); } @@ -57,4 +57,25 @@ public class MarkerWordItem extends WordItem { super.disclose(discloser); discloser.addProperty("markerWord", markerWord); } + + /** Creates a special word item which marks the start of a host name */ + public static MarkerWordItem createStartOfHost(String indexName) { + return new MarkerWordItem(startAnchor, UrlTokenizer.TERM_STARTHOST, indexName); + } + + /** Creates a special word item which marks the start of a host name, matching the default index */ + public static MarkerWordItem createStartOfHost() { + return createStartOfHost(""); + } + + /** Creates a special word item which marks the end of a host name */ + public static MarkerWordItem createEndOfHost(String indexName) { + return new MarkerWordItem(endAnchor, UrlTokenizer.TERM_ENDHOST, indexName); + } + + /** Creates a special word item which marks the end of a host name matching the default index */ + public static MarkerWordItem createEndOfHost() { + return createEndOfHost(""); + } + } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java index d7aec40bcf9..26da5eec7eb 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/PhraseItem.java @@ -32,10 +32,12 @@ public class PhraseItem extends CompositeIndexedItem { } } + @Override public ItemType getItemType() { return ItemType.PHRASE; } + @Override public String getName() { return "PHRASE"; } @@ -162,10 +164,12 @@ public class PhraseItem extends CompositeIndexedItem { return (BlockItem) getItem(index); } + @Override protected void encodeThis(ByteBuffer buffer) { super.encodeThis(buffer); // takes care of index bytes } + @Override public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; @@ -186,13 +190,16 @@ public class PhraseItem extends CompositeIndexedItem { } /** Returns false, no parenthezes for phrases */ + @Override protected boolean shouldParenthize() { return false; } /** Phrase items uses a empty heading instead of "PHRASE " */ + @Override protected void appendHeadingString(StringBuilder buffer) { } + @Override protected void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); diff --git a/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java b/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java new file mode 100644 index 00000000000..b8d7ea7aa11 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/UriItem.java @@ -0,0 +1,81 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; +import java.util.stream.Collectors; + +/** + * An item matching a uri field. + * This is syntactic sugar for a phrase with anchor items. + * + * @author bratseth + */ +public class UriItem extends PhraseItem { + + // Fields here are auxiliary information not needed for semantics but used preserve a nice canonical form + private boolean startAnchorDefault = false; + private boolean endAnchorDefault = false; + private String sourceString; + + public UriItem() { + super(); + } + + public UriItem(String indexName) { + super(indexName); + } + + /** + * Adds a start anchor as the *current* first item. + * This does not enforce that the items stays at the start if further items are added. + * */ + public void addStartAnchorItem() { + addItem(0, MarkerWordItem.createStartOfHost(getIndexName())); + } + + /** + * Adds an end anchor as the *current* last item. + * This does not enforce that the items stays at the end if further items are added. + */ + public void addEndAnchorItem() { + addItem(MarkerWordItem.createEndOfHost(getIndexName())); + } + + /** Returns whether the first item of this is a start anchor */ + public boolean hasStartAnchor() { + return getItem(0) instanceof MarkerWordItem + && ((MarkerWordItem)getItem(0)).isStartAnchor(); + } + + /** Returns whether the last item of this is an end anchor */ + public boolean hasEndAnchor() { + return getItem(getItemCount()-1) instanceof MarkerWordItem + && ((MarkerWordItem)getItem(getItemCount()-1)).isEndAnchor(); + } + + public boolean isStartAnchorDefault() { return startAnchorDefault; } + public void setStartAnchorDefault(boolean startAnchorDefault) { this.startAnchorDefault = startAnchorDefault; } + + public boolean isEndAnchorDefault() { return endAnchorDefault; } + public void setEndAnchorDefault(boolean endAnchorDefault) { this.endAnchorDefault = endAnchorDefault; } + + public void setSourceString(String sourceString) { this.sourceString = sourceString; } + + /** + * Returns the canonical form of the tokens of this: Either the source string, or if none + * each token except the start and end anchor separated by space + */ + public String getArgumentString() { + if (sourceString != null) return sourceString; + + List items = new ArrayList<>(items()); + if (hasStartAnchor()) + items.remove(0); + if (hasEndAnchor()) + items.remove(items.size() - 1); + return items.stream().map(item -> ((WordItem)item).getWord()).collect(Collectors.joining(" ")); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java index 9652caf8d58..1923fdbc50d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AbstractParser.java @@ -87,14 +87,10 @@ public abstract class AbstractParser implements CustomParser { } /** - * Returns whether we are in a mode which allows explicit anchoring - * markers, ^ and $ - * - * @return True if we are doing explicit anchoring. + * Returns whether we are in a mode which allows explicit anchoring markers, ^ and $ */ - public boolean explicitAnchoring() { - return site; - } + public boolean explicitAnchoring() { return site; } + } /** diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java index 8ecd4d8f81c..8c77b3d2130 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/StructuredParser.java @@ -557,7 +557,16 @@ abstract class StructuredParser extends AbstractParser { if (phrase != null) { phrase.addItem(word); } else if (firstWord != null) { - phrase = new PhraseItem(); + if (submodes.site || submodes.url) { + UriItem uriItem = new UriItem(); + if (submodes.site) + uriItem.setEndAnchorDefault(true); + phrase = uriItem; + } + else { + phrase = new PhraseItem(); + } + if (quoted || submodes.site || submodes.url) { phrase.setExplicit(true); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java index 91992c3e29e..940d6d51975 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java +++ b/container-search/src/main/java/com/yahoo/search/yql/MinimalQueryInserter.java @@ -38,9 +38,8 @@ public class MinimalQueryInserter extends Searcher { @Override public Result search(Query query, Execution execution) { - if (query.properties().get(YQL) == null) { - return execution.search(query); - } + if (query.properties().get(YQL) == null) return execution.search(query); + ParserEnvironment env = ParserEnvironment.fromExecutionContext(execution.context()); YqlParser parser = (YqlParser) ParserFactory.newInstance(Query.Type.YQL, env); parser.setQueryParser(false); diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index c27c047c899..896d1cf5c9c 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -13,6 +13,7 @@ import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT; import static com.yahoo.search.yql.YqlParser.CONNECTIVITY; import static com.yahoo.search.yql.YqlParser.DISTANCE; import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; +import static com.yahoo.search.yql.YqlParser.END_ANCHOR; import static com.yahoo.search.yql.YqlParser.EQUIV; import static com.yahoo.search.yql.YqlParser.FILTER; import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; @@ -33,12 +34,14 @@ import static com.yahoo.search.yql.YqlParser.RANKED; import static com.yahoo.search.yql.YqlParser.SAME_ELEMENT; import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD; import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE; +import static com.yahoo.search.yql.YqlParser.START_ANCHOR; import static com.yahoo.search.yql.YqlParser.STEM; import static com.yahoo.search.yql.YqlParser.SUBSTRING; import static com.yahoo.search.yql.YqlParser.SUFFIX; import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS; import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR; import static com.yahoo.search.yql.YqlParser.UNIQUE_ID; +import static com.yahoo.search.yql.YqlParser.URI; import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA; import static com.yahoo.search.yql.YqlParser.WAND; import static com.yahoo.search.yql.YqlParser.WEAK_AND; @@ -89,6 +92,7 @@ import com.yahoo.prelude.query.SuffixItem; import com.yahoo.prelude.query.TaggableItem; import com.yahoo.prelude.query.ToolBox; import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.UriItem; import com.yahoo.prelude.query.WandItem; import com.yahoo.prelude.query.WeakAndItem; import com.yahoo.prelude.query.WeightedSetItem; @@ -271,6 +275,47 @@ public class VespaSerializer { } + private static class UriSerializer extends Serializer { + @Override + void onExit(StringBuilder destination, Item item) { } + + @Override + boolean serialize(StringBuilder destination, Item item) { + UriItem uriItem = (UriItem) item; + String annotations = uriAnnotations(uriItem); + + destination.append(uriItem.getIndexName()).append(" contains "); + if (annotations.length() > 0) + destination.append('(').append(annotations); + destination.append(URI).append("(\""); + destination.append(uriItem.getArgumentString()); + destination.append("\")"); + if (annotations.length() > 0) + destination.append(')'); + return false; + } + + static String uriAnnotations(UriItem item) { + if (item.hasStartAnchor() == item.isStartAnchorDefault() && + item.hasEndAnchor() == item.isEndAnchorDefault()) + return ""; + + StringBuilder b = new StringBuilder(); + b.append("[{"); + if (item.hasStartAnchor() != item.isStartAnchorDefault()) { + b.append("\"" + START_ANCHOR + "\": " + item.hasStartAnchor()); + } + if (item.hasEndAnchor() != item.isEndAnchorDefault()) { + if (b.length() > 2) + b.append(", "); + b.append("\"" + END_ANCHOR + "\": " + item.hasEndAnchor()); + } + b.append("}]"); + return b.toString(); + } + + } + private static class NotSerializer extends Serializer { @Override @@ -1111,6 +1156,7 @@ public class VespaSerializer { dispatchBuilder.put(WeightedSetItem.class, new WeightedSetSerializer()); dispatchBuilder.put(WordItem.class, new WordSerializer()); dispatchBuilder.put(RegExpItem.class, new RegExpSerializer()); + dispatchBuilder.put(UriItem.class, new UriSerializer()); dispatch = ImmutableMap.copyOf(dispatchBuilder); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index 23dcaba0b3b..097ed86bd9e 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -55,6 +55,7 @@ import com.yahoo.prelude.query.TaggableItem; import com.yahoo.prelude.query.TermItem; import com.yahoo.prelude.query.ToolBox; import com.yahoo.prelude.query.ToolBox.QueryVisitor; +import com.yahoo.prelude.query.UriItem; import com.yahoo.prelude.query.WandItem; import com.yahoo.prelude.query.WeakAndItem; import com.yahoo.prelude.query.WeightedSetItem; @@ -91,7 +92,6 @@ import edu.umd.cs.findbugs.annotations.NonNull; * @author Stian Kristoffersen * @author Simon Thoresen Hult */ -@Beta public class YqlParser implements Parser { private static final String DESCENDING_HITS_ORDER = "descending"; @@ -127,6 +127,8 @@ public class YqlParser implements Parser { private static final String USER_INPUT = "userInput"; private static final String USER_QUERY = "userQuery"; private static final String NON_EMPTY = "nonEmpty"; + public static final String START_ANCHOR = "startAnchor"; + public static final String END_ANCHOR = "endAnchor"; public static final String SORTING_FUNCTION = "function"; public static final String SORTING_LOCALE = "locale"; @@ -176,6 +178,7 @@ public class YqlParser implements Parser { static final String WEAK_AND = "weakAnd"; static final String WEIGHTED_SET = "weightedSet"; static final String WEIGHT = "weight"; + static final String URI = "uri"; private final IndexFacts indexFacts; private final List connectedItems = new ArrayList<>(); @@ -316,7 +319,6 @@ public class YqlParser implements Parser { } } - @NonNull private Item convertExpression(OperatorNode ast) { try { annotationStack.addFirst(ast); @@ -354,7 +356,6 @@ public class YqlParser implements Parser { } } - @NonNull private Item buildFunctionCall(OperatorNode ast) { List names = ast.getArgument(0); Preconditions.checkArgument(names.size() == 1, "Expected 1 name, got %s.", names.size()); @@ -481,8 +482,8 @@ public class YqlParser implements Parser { WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast, TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS, "desired number of hits to accumulate in wand")); - Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, - Double.class, null, "min score for hit inclusion"); + Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, Double.class, null, + "min score for hit inclusion"); if (scoreThreshold != null) { out.setScoreThreshold(scoreThreshold); } @@ -1056,7 +1057,7 @@ public class YqlParser implements Parser { private Item buildTermSearch(OperatorNode ast) { assertHasOperator(ast, ExpressionOperator.CONTAINS); - String field = getIndex(ast.>getArgument(0)); + String field = getIndex(ast.getArgument(0)); if (userQuery != null && indexFactsSession.getIndex(field).isAttribute()) { userQuery.trace("Field '" + field + "' is an attribute, 'contains' will only match exactly", 1); } @@ -1065,11 +1066,11 @@ public class YqlParser implements Parser { private Item buildRegExpSearch(OperatorNode ast) { assertHasOperator(ast, ExpressionOperator.MATCHES); - String field = getIndex(ast.>getArgument(0)); + String field = getIndex(ast.getArgument(0)); if (userQuery != null && !indexFactsSession.getIndex(field).isAttribute()) { userQuery.trace("Field '" + field + "' is indexed, non-literal regular expressions will not be matched", 1); } - OperatorNode ast1 = ast.> getArgument(1); + OperatorNode ast1 = ast.getArgument(1); String wordData = getStringContents(ast1); RegExpItem regExp = new RegExpItem(field, true, wordData); return leafStyleSettings(ast1, regExp); @@ -1079,7 +1080,7 @@ public class YqlParser implements Parser { assertHasOperator(spec, ExpressionOperator.CALL); assertHasFunctionName(spec, RANGE); - IntItem range = instantiateRangeItem(spec.>> getArgument(1), spec); + IntItem range = instantiateRangeItem(spec.getArgument(1), spec); return leafStyleSettings(spec, range); } @@ -1101,16 +1102,15 @@ public class YqlParser implements Parser { } } - private IntItem instantiateRangeItem( - List> args, - OperatorNode spec) { + private IntItem instantiateRangeItem(List> args, + OperatorNode spec) { Preconditions.checkArgument(args.size() == 3, "Expected 3 arguments, got %s.", args.size()); Number lowerArg = getBound(args.get(1)); Number upperArg = getBound(args.get(2)); String bounds = getAnnotation(spec, BOUNDS, String.class, null, - "whether bounds should be open or closed"); + "whether bounds should be open or closed"); // TODO: add support for implicit transforms if (bounds == null) { return new RangeItem(lowerArg, upperArg, getIndex(args.get(0))); @@ -1127,8 +1127,7 @@ public class YqlParser implements Parser { from = new Limit(lowerArg, true); to = new Limit(upperArg, false); } else { - throw newUnexpectedArgumentException(bounds, BOUNDS_OPEN, - BOUNDS_LEFT_OPEN, BOUNDS_RIGHT_OPEN); + throw newUnexpectedArgumentException(bounds, BOUNDS_OPEN, BOUNDS_LEFT_OPEN, BOUNDS_RIGHT_OPEN); } return new IntItem(from, to, getIndex(args.get(0))); } @@ -1179,11 +1178,36 @@ public class YqlParser implements Parser { return instantiateEquivItem(field, ast); case ALTERNATIVES: return instantiateWordAlternativesItem(field, ast); + case URI: + return instantiateUriItem(field, ast); default: - throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT); + throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT, URI); } } + private Item instantiateEquivItem(String field, OperatorNode ast) { + List> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() >= 2, "Expected 2 or more arguments, got %s.", args.size()); + + EquivItem equiv = new EquivItem(); + equiv.setIndexName(field); + for (OperatorNode arg : args) { + switch (arg.getOperator()) { + case LITERAL: + equiv.addItem(instantiateWordItem(field, arg, equiv.getClass())); + break; + case CALL: + assertHasFunctionName(arg, PHRASE); + equiv.addItem(instantiatePhraseItem(field, arg)); + break; + default: + throw newUnexpectedArgumentException(arg.getOperator(), + ExpressionOperator.CALL, ExpressionOperator.LITERAL); + } + } + return leafStyleSettings(ast, equiv); + } + private Item instantiateWordAlternativesItem(String field, OperatorNode ast) { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() >= 1, "Expected 1 or more arguments, got %s.", args.size()); @@ -1208,27 +1232,30 @@ public class YqlParser implements Parser { return leafStyleSettings(ast, new WordAlternativesItem(field, isFromQuery, origin, terms)); } - private Item instantiateEquivItem(String field, OperatorNode ast) { - List> args = ast.getArgument(1); - Preconditions.checkArgument(args.size() >= 2, "Expected 2 or more arguments, got %s.", args.size()); + private UriItem instantiateUriItem(String field, OperatorNode ast) { + UriItem uriItem = new UriItem(field); - EquivItem equiv = new EquivItem(); - equiv.setIndexName(field); - for (OperatorNode arg : args) { - switch (arg.getOperator()) { - case LITERAL: - equiv.addItem(instantiateWordItem(field, arg, equiv.getClass())); - break; - case CALL: - assertHasFunctionName(arg, PHRASE); - equiv.addItem(instantiatePhraseItem(field, arg)); - break; - default: - throw newUnexpectedArgumentException(arg.getOperator(), - ExpressionOperator.CALL, ExpressionOperator.LITERAL); - } - } - return leafStyleSettings(ast, equiv); + boolean startAnchorDefault = false; + boolean endAnchorDefault = indexFactsSession.getIndex(field).isHostIndex(); + + if (getAnnotation(ast, START_ANCHOR, Boolean.class, startAnchorDefault, + "whether uri matching should be anchored to the start")) + uriItem.addStartAnchorItem(); + + String uriString = ast.>> getArgument(1).get(0).getArgument(0); + for (String token : segmenter.segment(uriString, Language.ENGLISH)) + uriItem.addItem(new WordItem(token, field, true)); + + if (getAnnotation(ast, END_ANCHOR, Boolean.class, endAnchorDefault, + "whether uri matching should be anchored to the end")) + uriItem.addEndAnchorItem(); + + // Aux info to preserve minimal and expected canonical form + uriItem.setStartAnchorDefault(startAnchorDefault); + uriItem.setEndAnchorDefault(endAnchorDefault); + uriItem.setSourceString(uriString); + + return uriItem; } private Item instantiateWordItem(String field, OperatorNode ast, Class parent) { diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java index fb43fa0421f..5cae40bd10d 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ExactMatchAndDefaultIndexTestCase.java @@ -28,7 +28,7 @@ public class ExactMatchAndDefaultIndexTestCase { Index index = new Index("testexact"); index.setExact(true, null); sd.addIndex(index); - IndexFacts facts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts facts = new IndexFacts(new IndexModel(sd)); Query q = new Query("?query=" + enc("a/b foo.com") + "&default-index=testexact"); q.getModel().setExecution(new Execution(new Execution.Context(null, facts, null, null, null))); diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java index dc2f990431a..0fdad1a1f9c 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java @@ -2091,7 +2091,7 @@ public class ParseTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); ParsingTester customTester = new ParsingTester(indexFacts); customTester.assertParsed("testexact1:/,%&#", "testexact1:/,%&#", Query.Type.ALL); @@ -2109,7 +2109,7 @@ public class ParseTestCase { index1.setExact(true, "*!*"); sd.addIndex(index1); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); ParsingTester customTester = new ParsingTester(indexFacts); customTester.assertParsed("testexact1:_-_*!200","testexact1:_-_*!**!!",Query.Type.ALL); @@ -2124,7 +2124,7 @@ public class ParseTestCase { index1.setExact(true, "*"); sd.addIndex(index1); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); ParsingTester customTester = new ParsingTester(indexFacts); customTester.assertParsed("testexact1:_-_*!200","testexact1:_-_**!!",Query.Type.ALL); diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java index 70fc5d56ab9..12d993e8d41 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java @@ -316,7 +316,7 @@ public class TokenizerTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts facts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts facts = new IndexFacts(new IndexModel(sd)); IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer=new Tokenizer(new SimpleLinguistics()); List tokens=tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*& b:c", "default", session); @@ -360,7 +360,7 @@ public class TokenizerTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts facts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&", session); @@ -397,7 +397,7 @@ public class TokenizerTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts facts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:/,%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*", session); @@ -434,7 +434,7 @@ public class TokenizerTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts facts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts facts = new IndexFacts(new IndexModel(sd)); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); IndexFacts.Session session = facts.newSession(Collections.emptySet(), Collections.emptySet()); List tokens = tokenizer.tokenize("normal a:b (normal testexact1:!/%#%&+-+ ) testexact2:ho_/&%&/()/aa*::*&b:", session); @@ -472,7 +472,7 @@ public class TokenizerTestCase { index2.setExact(true, "()/aa*::*&"); sd.addIndex(index2); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); IndexFacts.Session facts = indexFacts.newSession(Collections.emptySet(), Collections.emptySet()); Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); diff --git a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java index e3a5ce76ffb..82a5a0c7a24 100644 --- a/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/test/IndexFactsTestCase.java @@ -140,7 +140,7 @@ public class IndexFactsTestCase { index.setExact(true,"^^^"); sd.addIndex(index); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); Query query = new Query(); query.getModel().getSources().add("artist"); assertTrue(indexFacts.newSession(query).getIndex(indexName).isExact()); @@ -187,7 +187,7 @@ public class IndexFactsTestCase { sd.addIndex(u_index); sd.addIndex(b_index); - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); Query query = new Query(); query.getModel().getSources().add("foobar"); IndexFacts.Session session = indexFacts.newSession(query); diff --git a/container-search/src/test/java/com/yahoo/prelude/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/prelude/test/QueryTestCase.java index 627dbb13cbe..78868f37718 100644 --- a/container-search/src/test/java/com/yahoo/prelude/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/test/QueryTestCase.java @@ -376,7 +376,7 @@ public class QueryTestCase { sd.addIndex(new Index("default")); sd.addIndex(new Index("keyword")); sd.addIndex(new Index("content")); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + return new IndexFacts(new IndexModel(sd)); } private Query newQueryFromEncoded(String encodedQueryString, Language language, Linguistics linguistics) { diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java index 663fe004b43..c081708e1a5 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/LowercasingTestCase.java @@ -59,7 +59,7 @@ public class LowercasingTestCase { sd.addIndex(defaultIndex); sd.addIndex(sarrBamse); sd.addIndex(sarrTeddy); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + return new IndexFacts(new IndexModel(sd)); } private Execution createExecution() { diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/WandSearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/WandSearcherTestCase.java index c52cedaaf0e..68d1fe57134 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/WandSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/WandSearcherTestCase.java @@ -45,7 +45,7 @@ public class WandSearcherTestCase { private IndexFacts buildIndexFacts() { SearchDefinition sd = new SearchDefinition("test"); sd.addIndex(new Index(VESPA_FIELD)); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + return new IndexFacts(new IndexModel(sd)); } private Execution buildExec() { diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/test/NGramSearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/test/NGramSearcherTestCase.java index 60abac599c4..a3135222880 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/test/NGramSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/test/NGramSearcherTestCase.java @@ -71,7 +71,7 @@ public class NGramSearcherTestCase { gram14.setDynamicSummary(true); sd.addIndex(gram14); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + return new IndexFacts(new IndexModel(sd)); } private Searcher createSearcher() { diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/test/RangeQueryOptimizerTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/test/RangeQueryOptimizerTestCase.java index cb380b31030..720c94ff26d 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/test/RangeQueryOptimizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/test/RangeQueryOptimizerTestCase.java @@ -221,7 +221,7 @@ public class RangeQueryOptimizerTestCase { sd.addIndex(singleValue1); sd.addIndex(singleValue2); sd.addIndex(multiValue); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + return new IndexFacts(new IndexModel(sd)); } } diff --git a/container-search/src/test/java/com/yahoo/search/querytransform/test/SortingDegraderTestCase.java b/container-search/src/test/java/com/yahoo/search/querytransform/test/SortingDegraderTestCase.java index 243bacc73b3..fd6f4c42d1d 100644 --- a/container-search/src/test/java/com/yahoo/search/querytransform/test/SortingDegraderTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/querytransform/test/SortingDegraderTestCase.java @@ -171,7 +171,7 @@ public class SortingDegraderTestCase { test.addIndex(fastSearchAttribute2); test.addIndex(nonFastSearchAttribute); test.addIndex(stringAttribute); - return new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(test))); + return new IndexFacts(new IndexModel(test)); } } diff --git a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java index da4b657aca2..b65359a711b 100644 --- a/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/test/QueryTestCase.java @@ -887,7 +887,7 @@ public class QueryTestCase { tokenIndex.setPlainTokens(true); sd.addIndex(tokenIndex); } - IndexFacts indexFacts = new IndexFacts(new IndexModel(Collections.emptyMap(), Collections.singleton(sd))); + IndexFacts indexFacts = new IndexFacts(new IndexModel(sd)); MockLinguistics mockLinguistics = new MockLinguistics(); q.getModel().setExecution(new Execution(Execution.Context.createContextStub(null, indexFacts, mockLinguistics))); q.getModel().getQueryTree(); // cause parsing diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index 18fad47de37..70d50b23bed 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -1,18 +1,20 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.yql; -import com.yahoo.component.Version; import com.yahoo.component.chain.Chain; import com.yahoo.container.QrSearchersConfig; import com.yahoo.language.Language; import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.IndexModel; +import com.yahoo.prelude.SearchDefinition; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.BoolItem; import com.yahoo.prelude.query.IndexedItem; import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.MarkerWordItem; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.PhraseSegmentItem; import com.yahoo.prelude.query.PrefixItem; @@ -934,6 +936,126 @@ public class YqlParserTestCase { } } + @Test + public void testUrlHostSearchingDefaultAnchors() { + // Simple query syntax, for reference + assertUrlQuery("urlfield.hostname", new Query("?query=urlfield.hostname:google.com"), false, true, true); + + // YQL query + Query yql = new Query(); + yql.properties().set("yql", "select * from sources * where urlfield.hostname contains uri(\"google.com\");"); + assertUrlQuery("urlfield.hostname", yql, false, true, true); + } + + @Test + public void testUrlHostSearchingNoAnchors() { + // Simple query syntax, for reference + assertUrlQuery("urlfield.hostname", new Query("?query=urlfield.hostname:google.com*"), false, false, true); + + // YQL query + Query yql = new Query(); + yql.properties().set("yql", "select * from sources * where urlfield.hostname contains ([{\"endAnchor\": false }]uri(\"google.com\"));"); + assertUrlQuery("urlfield.hostname", yql, false, false, true); + } + + @Test + public void testUrlHostSearchingBothAnchors() { + // Simple query syntax, for reference + assertUrlQuery("urlfield.hostname", new Query("?query=urlfield.hostname:%5Egoogle.com"), true, true, true); // %5E = ^ + + // YQL query + Query yql = new Query(); + yql.properties().set("yql", "select * from sources * where urlfield.hostname contains ([{\"startAnchor\": true }] uri(\"google.com\"));"); + assertUrlQuery("urlfield.hostname", yql, true, true, true); + } + + @Test + public void testUriNonHostDoesNotCreateAnchors() { + // Simple query syntax, for reference + assertUrlQuery("urlfield", new Query("?query=urlfield:google.com"), false, false, false); + + // YQL query + Query yql = new Query(); + yql.properties().set("yql", "select * from sources * where urlfield contains uri(\"google.com\");"); + assertUrlQuery("urlfield", yql, false, false, false); + } + + private void assertUrlQuery(String field, Query query, boolean startAnchor, boolean endAnchor, boolean endAnchorIsDefault) { + boolean startAnchorIsDefault = false; // Always + + // Set up + SearchDefinition test = new SearchDefinition("test"); + Index urlField = new Index("urlfield"); + urlField.setUriIndex(true); + test.addIndex(urlField); + Index hostField = new Index("urlfield.hostname"); + hostField.setHostIndex(true); + test.addIndex(hostField); + + Chain searchChain = new Chain<>(new MinimalQueryInserter()); + Execution.Context context = Execution.Context.createContextStub(null, + new IndexFacts(new IndexModel(test)), + new SimpleLinguistics()); + Execution execution = new Execution(searchChain, context); + execution.search(query); + + // Check parsing and serial forms + if (endAnchor && startAnchor) + assertEquals(field + ":\"^ google com $\"", query.getModel().getQueryTree().toString()); + else if (startAnchor) + assertEquals(field + ":\"^ google com\"", query.getModel().getQueryTree().toString()); + else if (endAnchor) + assertEquals(field + ":\"google com $\"", query.getModel().getQueryTree().toString()); + else + assertEquals(field + ":\"google com\"", query.getModel().getQueryTree().toString()); + + + boolean hasAnnotations = startAnchor != startAnchorIsDefault || endAnchor != endAnchorIsDefault; + StringBuilder expectedYql = new StringBuilder("select * from sources * where "); + expectedYql.append(field).append(" contains "); + if (hasAnnotations) + expectedYql.append("([{"); + if (startAnchor != startAnchorIsDefault) + expectedYql.append("\"startAnchor\": " + startAnchor); + if (endAnchor != endAnchorIsDefault) { + if (startAnchor != startAnchorIsDefault) + expectedYql.append(", "); + expectedYql.append("\"endAnchor\": " + endAnchor); + } + if (hasAnnotations) + expectedYql.append("}]"); + expectedYql.append("uri("); + if (query.properties().get("yql") != null) + expectedYql.append("\"google.com\")"); // source string is preserved when parsing YQL + else + expectedYql.append("\"google com\")"); // but not with the simple syntax + if (hasAnnotations) + expectedYql.append(")"); + expectedYql.append(";"); + assertEquals(expectedYql.toString(), query.yqlRepresentation()); + + assertTrue(query.getModel().getQueryTree().getRoot() instanceof PhraseItem); + PhraseItem root = (PhraseItem)query.getModel().getQueryTree().getRoot(); + int expectedLength = 2; + if (startAnchor) + expectedLength++; + if (endAnchor) + expectedLength++; + assertEquals(expectedLength, root.getNumWords()); + + if (startAnchor) + assertEquals(MarkerWordItem.createStartOfHost("urlfield.hostname"), root.getItem(0)); + if (endAnchor) + assertEquals(MarkerWordItem.createEndOfHost("urlfield.hostname"), root.getItem(expectedLength-1)); + + // Check YQL parser-serialization roundtrip + Query reserialized = new Query(); + reserialized.properties().set("yql", query.yqlRepresentation()); + execution = new Execution(searchChain, context); + execution.search(reserialized); + assertEquals(query.yqlRepresentation(), reserialized.yqlRepresentation()); + } + private void checkWordAlternativesContent(WordAlternativesItem alternatives) { boolean seenTree = false; boolean seenForest = false; -- cgit v1.2.3