diff options
Diffstat (limited to 'container-search')
9 files changed, 218 insertions, 3 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index b7aa1a8d0ef..6249988a5ee 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -541,6 +541,30 @@ ], "fields": [] }, + "com.yahoo.prelude.query.FuzzyItem": { + "superClass": "com.yahoo.prelude.query.TermItem", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(java.lang.String, boolean, java.lang.String)", + "public void setValue(java.lang.String)", + "public java.lang.String getRawWord()", + "public boolean isWords()", + "public com.yahoo.prelude.query.Item$ItemType getItemType()", + "public java.lang.String getName()", + "public java.lang.String stringValue()", + "public boolean isStemmed()", + "public java.lang.String getIndexedString()", + "public int getNumWords()", + "public boolean equals(java.lang.Object)", + "public int hashCode()", + "public java.lang.String toString()", + "protected void encodeThis(java.nio.ByteBuffer)" + ], + "fields": [] + }, "com.yahoo.prelude.query.GeoLocationItem": { "superClass": "com.yahoo.prelude.query.TermItem", "interfaces": [], @@ -742,6 +766,7 @@ "public static final enum com.yahoo.prelude.query.Item$ItemType GEO_LOCATION_TERM", "public static final enum com.yahoo.prelude.query.Item$ItemType TRUE", "public static final enum com.yahoo.prelude.query.Item$ItemType FALSE", + "public static final enum com.yahoo.prelude.query.Item$ItemType FUZZY", "public final int code" ] }, diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java new file mode 100644 index 00000000000..74c31a5d1f0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -0,0 +1,107 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.query; + +import java.nio.ByteBuffer; + +/** + * Fuzzy search term + * + * @author alexeyche + */ +public class FuzzyItem extends TermItem { + private String term; + + public FuzzyItem(String indexName, boolean isFromQuery, String term) { + super(indexName, isFromQuery, null); + setValue(term); + } + + @Override + public void setValue(String value) { + this.term = value; + } + + @Override + public String getRawWord() { + return stringValue(); + } + + @Override + public boolean isWords() { + return false; + } + + @Override + public ItemType getItemType() { + return ItemType.FUZZY; + } + + @Override + public String getName() { + return "FUZZY"; + } + + @Override + public String stringValue() { + return term; + } + + @Override + public boolean isStemmed() { + return false; + } + + @Override + public String getIndexedString() { + return stringValue(); + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + FuzzyItem other = (FuzzyItem) obj; + if (term == null) { + if (other.term != null) { + return false; + } + } else if (!term.equals(other.term)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("FuzzyItem [term=").append(term).append("]"); + return builder.toString(); + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); + putString(getIndexedString(), buffer); + } +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index 2e0c3cf8593..02b208b6ce1 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -56,7 +56,8 @@ public abstract class Item implements Cloneable { NEAREST_NEIGHBOR(26), GEO_LOCATION_TERM(27), TRUE(28), - FALSE(29); + FALSE(29), + FUZZY(30); public final int code; diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java index 1805a11ff5e..320148ec01d 100644 --- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -16,6 +16,7 @@ import com.yahoo.prelude.query.CompositeItem; import com.yahoo.prelude.query.DotProductItem; import com.yahoo.prelude.query.EquivItem; import com.yahoo.prelude.query.ExactStringItem; +import com.yahoo.prelude.query.FuzzyItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; @@ -82,6 +83,7 @@ import static com.yahoo.search.yql.YqlParser.DISTANCE_THRESHOLD; import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; import static com.yahoo.search.yql.YqlParser.EQUIV; import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.FUZZY; import static com.yahoo.search.yql.YqlParser.GEO_LOCATION; import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; import static com.yahoo.search.yql.YqlParser.HNSW_EXPLORE_ADDITIONAL_HITS; @@ -926,6 +928,8 @@ public class SelectParser implements Parser { return instantiateONearItem(field, key, value); case EQUIV: return instantiateEquivItem(field, key, value); + case FUZZY: + return instantiateFuzzyItem(field, key, value); case ALTERNATIVES: return instantiateWordAlternativesItem(field, key, value); default: @@ -1155,6 +1159,15 @@ public class SelectParser implements Parser { return leafStyleSettings(getAnnotations(value), equiv); } + private Item instantiateFuzzyItem(String field, String key, Inspector value) { + HashMap<Integer, Inspector> children = childMap(value); + Preconditions.checkArgument(children.size() == 1, "Expected 1 argument, got %s.", children.size()); + String wordData = children.get(0).asString(); + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData); + + return leafStyleSettings(getAnnotations(value), fuzzy); + } + private Item instantiateWordAlternativesItem(String field, String key, Inspector value) { HashMap<Integer, Inspector> children = childMap(value); Preconditions.checkArgument(children.size() >= 1, "Expected 1 or more arguments, got %s.", children.size()); diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index cc441eb0c3d..e778798b0e5 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -16,6 +16,7 @@ import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; import static com.yahoo.search.yql.YqlParser.END_ANCHOR; import static com.yahoo.search.yql.YqlParser.EQUIV; import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.FUZZY; import static com.yahoo.search.yql.YqlParser.GEO_LOCATION; import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; @@ -70,6 +71,7 @@ import com.yahoo.prelude.query.BoolItem; import com.yahoo.prelude.query.DotProductItem; import com.yahoo.prelude.query.EquivItem; import com.yahoo.prelude.query.FalseItem; +import com.yahoo.prelude.query.FuzzyItem; import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IndexedItem; import com.yahoo.prelude.query.IntItem; @@ -517,6 +519,32 @@ public class VespaSerializer { } } + private static class FuzzySerializer extends Serializer<FuzzyItem> { + + @Override + void onExit(StringBuilder destination, FuzzyItem item) { } + + @Override + boolean serialize(StringBuilder destination, FuzzyItem fuzzy) { + String annotations = leafAnnotations(fuzzy); + destination.append(normalizeIndexName(fuzzy.getIndexName())).append(" contains "); + + if (annotations.length() > 0) { + destination.append('(').append(annotations); + } + + destination.append(FUZZY).append('('); + destination.append('"'); + escape(fuzzy.getIndexedString(), destination).append('"'); + destination.append(')'); + + if (annotations.length() > 0) { + destination.append(')'); + } + return false; + } + } + private static class ONearSerializer extends Serializer<ONearItem> { @Override @@ -1239,6 +1267,7 @@ public class VespaSerializer { dispatchBuilder.put(WordItem.class, new WordSerializer()); dispatchBuilder.put(RegExpItem.class, new RegExpSerializer()); dispatchBuilder.put(UriItem.class, new UriSerializer()); + dispatchBuilder.put(FuzzyItem.class, new FuzzySerializer()); dispatch = ImmutableMap.copyOf(dispatchBuilder); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index 26508fec3c4..06ee0e706f3 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -34,6 +34,7 @@ import com.yahoo.prelude.query.CompositeItem; import com.yahoo.prelude.query.DotProductItem; import com.yahoo.prelude.query.EquivItem; import com.yahoo.prelude.query.FalseItem; +import com.yahoo.prelude.query.FuzzyItem; import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; @@ -192,6 +193,7 @@ public class YqlParser implements Parser { public static final String WEAK_AND = "weakAnd"; public static final String WEIGHT = "weight"; public static final String WEIGHTED_SET = "weightedSet"; + public static final String FUZZY = "fuzzy"; private final IndexFacts indexFacts; private final List<ConnectedItem> connectedItems = new ArrayList<>(); @@ -1171,7 +1173,7 @@ public class YqlParser implements Parser { assertHasOperator(ast, ExpressionOperator.CONTAINS); String field = getIndex(ast.getArgument(0)); if (userQuery != null && indexFactsSession.getIndex(field).isAttribute()) { - userQuery.trace("Field '" + field + "' is an attribute, 'contains' will only match exactly", 2); + userQuery.trace("Field '" + field + "' is an attribute, 'contains' will only match exactly (unless fuzzy is used)", 2); } return instantiateLeafItem(field, ast.<OperatorNode<ExpressionOperator>> getArgument(1)); } @@ -1298,11 +1300,23 @@ public class YqlParser implements Parser { return instantiateWordAlternativesItem(field, ast); case URI: return instantiateUriItem(field, ast); + case FUZZY: + return instantiateFuzzyItem(field, ast); default: - throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT, URI); + throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT, URI, FUZZY); } } + private Item instantiateFuzzyItem(String field, OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 1, "Expected 1 argument, got %s.", args.size()); + + String wordData = getStringContents(args.get(0)); + + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData); + return leafStyleSettings(ast, fuzzy); + } + private Item instantiateEquivItem(String field, OperatorNode<ExpressionOperator> ast) { List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); Preconditions.checkArgument(args.size() >= 2, "Expected 2 or more arguments, got %s.", args.size()); diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java index 7057f996041..1269c2a5aef 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java @@ -443,4 +443,10 @@ public class VespaSerializerTestCase { + "alternatives({\"trees\": 1.0, \"tree\": 0.7}))" + ")"); } + + @Test + public void testFuzzy() { + parseAndConfirm("foo contains fuzzy(\"a\")"); + } + } diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index 15713dc1f97..f40e212adde 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -11,6 +11,7 @@ import com.yahoo.prelude.SearchDefinition; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.BoolItem; import com.yahoo.prelude.query.ExactStringItem; +import com.yahoo.prelude.query.FuzzyItem; import com.yahoo.prelude.query.IndexedItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.MarkerWordItem; @@ -382,6 +383,15 @@ public class YqlParserTestCase { } @Test + public void testFuzzy() { + QueryTree x = parse("select foo from bar where baz contains fuzzy(\"a b\")"); + Item root = x.getRoot(); + assertSame(FuzzyItem.class, root.getClass()); + assertEquals("baz", ((FuzzyItem) root).getIndexName()); + assertEquals("a b", ((FuzzyItem) root).stringValue()); + } + + @Test public void testStemming() { assertTrue(getRootWord("select foo from bar where baz contains " + "([ {stem: false} ]\"colors\")").isStemmed()); diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java index 0dcfb8392ef..9b867be1484 100644 --- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java +++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.ExactStringItem; +import com.yahoo.prelude.query.FuzzyItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.PhraseItem; import com.yahoo.prelude.query.PrefixItem; @@ -674,6 +675,15 @@ public class SelectTestCase { checkWordAlternativesContent(alternatives); } + @Test + public void testFuzzy() { + QueryTree x = parseWhere("{ \"contains\": [\"description\", { \"fuzzy\": [\"a b\"] }] }"); + Item root = x.getRoot(); + assertSame(FuzzyItem.class, root.getClass()); + assertEquals("description", ((FuzzyItem) root).getIndexName()); + assertEquals("a b", ((FuzzyItem) root).stringValue()); + } + //------------------------------------------------------------------- grouping tests @Test |