diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-04-11 11:26:02 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-11 11:26:02 +0200 |
commit | 65dc21685f2286a30c82c7d14c9fe5fe5c42d412 (patch) | |
tree | c28d2a5dc1bff4dab1051c163042b84899d2bb2c /container-search | |
parent | 23841f2517967c1a59cf9826f1de953c5caa7199 (diff) | |
parent | 16aaf73dc37c63fa92a3298e6c9f8fa6ed32422a (diff) |
Merge pull request #21972 from alexeyche/alexeyche/fuzzy-query-annotations
Propagating annotations for fuzzy query [WIP]
Diffstat (limited to 'container-search')
7 files changed, 136 insertions, 24 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index fafb79ae6f6..303e0f65b0c 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -548,7 +548,11 @@ "public" ], "methods": [ - "public void <init>(java.lang.String, boolean, java.lang.String)", + "public void <init>(java.lang.String, boolean, java.lang.String, int, int)", + "public void setMaxEditDistance(int)", + "public void setPrefixLength(int)", + "public int getPrefixLength()", + "public int getMaxEditDistance()", "public void setValue(java.lang.String)", "public java.lang.String getRawWord()", "public boolean isWords()", @@ -560,10 +564,13 @@ "public int getNumWords()", "public boolean equals(java.lang.Object)", "public int hashCode()", - "public java.lang.String toString()", + "protected void appendHeadingString(java.lang.StringBuilder)", "protected void encodeThis(java.nio.ByteBuffer)" ], - "fields": [] + "fields": [ + "public static int DEFAULT_MAX_EDIT_DISTANCE", + "public static int DEFAULT_PREFIX_LENGTH" + ] }, "com.yahoo.prelude.query.GeoLocationItem": { "superClass": "com.yahoo.prelude.query.TermItem", diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java index 74c31a5d1f0..b26205b74e9 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -1,7 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; +import com.yahoo.compress.IntegerCompressor; + import java.nio.ByteBuffer; +import java.util.Objects; /** * Fuzzy search term @@ -11,9 +14,37 @@ import java.nio.ByteBuffer; public class FuzzyItem extends TermItem { private String term; - public FuzzyItem(String indexName, boolean isFromQuery, String term) { + private int maxEditDistance; + private int prefixLength; + + public static int DEFAULT_MAX_EDIT_DISTANCE = 2; + public static int DEFAULT_PREFIX_LENGTH = 0; + + public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) { super(indexName, isFromQuery, null); setValue(term); + setMaxEditDistance(maxEditDistance); + setPrefixLength(prefixLength); + } + + public void setMaxEditDistance(int maxEditDistance) { + if (maxEditDistance < 0) + throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance); + this.maxEditDistance = maxEditDistance; + } + + public void setPrefixLength(int prefixLength) { + if (prefixLength < 0) + throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength); + this.prefixLength = prefixLength; + } + + public int getPrefixLength() { + return this.prefixLength; + } + + public int getMaxEditDistance() { + return this.maxEditDistance; } @Override @@ -73,35 +104,36 @@ public class FuzzyItem extends TermItem { return false; } FuzzyItem other = (FuzzyItem) obj; - if (term == null) { - if (other.term != null) { - return false; - } - } else if (!term.equals(other.term)) { - return false; - } + if (!this.term.equals(other.term)) return false; + if (this.maxEditDistance != other.maxEditDistance) return false; + if (this.prefixLength != other.prefixLength) return false; return true; } @Override public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result + ((term == null) ? 0 : term.hashCode()); - return result; + return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength); } @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - builder.append("FuzzyItem [term=").append(term).append("]"); - return builder.toString(); + protected void appendHeadingString(StringBuilder buffer) { + buffer.append(getName()); + buffer.append("("); + buffer.append(this.term); + buffer.append(","); + buffer.append(this.maxEditDistance); + buffer.append(","); + buffer.append(this.prefixLength); + buffer.append(")"); + buffer.append(" "); } @Override protected void encodeThis(ByteBuffer buffer) { super.encodeThis(buffer); putString(getIndexedString(), buffer); + IntegerCompressor.putCompressedPositiveNumber(this.maxEditDistance, buffer); + IntegerCompressor.putCompressedPositiveNumber(this.prefixLength, buffer); } } diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java index 320148ec01d..ccf24a13f34 100644 --- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -61,6 +61,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import static com.yahoo.search.yql.YqlParser.MAX_EDIT_DISTANCE; +import static com.yahoo.search.yql.YqlParser.PREFIX_LENGTH; import static com.yahoo.slime.Type.ARRAY; import static com.yahoo.slime.Type.DOUBLE; import static com.yahoo.slime.Type.LONG; @@ -1161,9 +1163,16 @@ public class SelectParser implements Parser { private Item instantiateFuzzyItem(String field, String key, Inspector value) { HashMap<Integer, Inspector> children = childMap(value); + HashMap<String, Inspector> annotations = getAnnotationMap(value); + Preconditions.checkArgument(children.size() == 1, "Expected 1 argument, got %s.", children.size()); + String wordData = children.get(0).asString(); - FuzzyItem fuzzy = new FuzzyItem(field, true, wordData); + + Integer maxEditDistance = getIntegerAnnotation(MAX_EDIT_DISTANCE, annotations, FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE); + Integer prefixLength = getIntegerAnnotation(PREFIX_LENGTH, annotations, FuzzyItem.DEFAULT_PREFIX_LENGTH); + + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength); return leafStyleSettings(getAnnotations(value), fuzzy); } diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index e778798b0e5..194863e3129 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -21,6 +21,7 @@ import static com.yahoo.search.yql.YqlParser.GEO_LOCATION; import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; import static com.yahoo.search.yql.YqlParser.LABEL; +import static com.yahoo.search.yql.YqlParser.MAX_EDIT_DISTANCE; import static com.yahoo.search.yql.YqlParser.NEAR; import static com.yahoo.search.yql.YqlParser.NEAREST_NEIGHBOR; import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE; @@ -31,6 +32,7 @@ import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET; import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL; import static com.yahoo.search.yql.YqlParser.PHRASE; import static com.yahoo.search.yql.YqlParser.PREFIX; +import static com.yahoo.search.yql.YqlParser.PREFIX_LENGTH; import static com.yahoo.search.yql.YqlParser.RANGE; import static com.yahoo.search.yql.YqlParser.RANK; import static com.yahoo.search.yql.YqlParser.RANKED; @@ -526,7 +528,8 @@ public class VespaSerializer { @Override boolean serialize(StringBuilder destination, FuzzyItem fuzzy) { - String annotations = leafAnnotations(fuzzy); + String annotations = fuzzyAnnotations(fuzzy); + destination.append(normalizeIndexName(fuzzy.getIndexName())).append(" contains "); if (annotations.length() > 0) { @@ -543,6 +546,30 @@ public class VespaSerializer { } return false; } + + static String fuzzyAnnotations(FuzzyItem fuzzyItem) { + boolean isMaxEditDistanceSet = fuzzyItem.getMaxEditDistance() != FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE; + boolean isPrefixLengthSet = fuzzyItem.getPrefixLength() != FuzzyItem.DEFAULT_PREFIX_LENGTH; + boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet; + + StringBuilder builder = new StringBuilder(); + if (anyAnnotationSet) { + builder.append("[{"); + } + if (isMaxEditDistanceSet) { + builder.append(MAX_EDIT_DISTANCE + ":").append(fuzzyItem.getMaxEditDistance()); + } + if (isMaxEditDistanceSet && isPrefixLengthSet) { + builder.append(","); + } + if (isPrefixLengthSet) { + builder.append(PREFIX_LENGTH + ":").append(fuzzyItem.getPrefixLength()); + } + if (anyAnnotationSet) { + builder.append("}]"); + } + return builder.toString(); + } } private static class ONearSerializer extends Serializer<ONearItem> { diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index 06ee0e706f3..19d4e6d41ba 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -126,6 +126,8 @@ public class YqlParser implements Parser { private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it"; private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item"; + private static final String MAX_EDIT_DISTANCE_DESCRIPTION = "setting for an inclusive upper bound for a fuzzy edit-distance search"; + private static final String PREFIX_LENGTH_DESCRIPTION = "setting for a prefix length that is considered frozen for a fuzzy search"; private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex"; private static final String USER_INPUT_GRAMMAR = "grammar"; @@ -194,6 +196,9 @@ public class YqlParser implements Parser { public static final String WEIGHT = "weight"; public static final String WEIGHTED_SET = "weightedSet"; public static final String FUZZY = "fuzzy"; + public static final String MAX_EDIT_DISTANCE = "maxEditDistance"; + public static final String PREFIX_LENGTH = "prefixLength"; + private final IndexFacts indexFacts; private final List<ConnectedItem> connectedItems = new ArrayList<>(); @@ -1313,7 +1318,21 @@ public class YqlParser implements Parser { String wordData = getStringContents(args.get(0)); - FuzzyItem fuzzy = new FuzzyItem(field, true, wordData); + Integer maxEditDistance = getAnnotation( + ast, + MAX_EDIT_DISTANCE, + Integer.class, + FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, + MAX_EDIT_DISTANCE_DESCRIPTION); + + Integer prefixLength = getAnnotation( + ast, + PREFIX_LENGTH, + Integer.class, + FuzzyItem.DEFAULT_PREFIX_LENGTH, + PREFIX_LENGTH_DESCRIPTION); + + FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength); return leafStyleSettings(ast, fuzzy); } diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java index 1269c2a5aef..8a90d224003 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java @@ -449,4 +449,9 @@ public class VespaSerializerTestCase { parseAndConfirm("foo contains fuzzy(\"a\")"); } + @Test + public void testFuzzyAnnotations() { + parseAndConfirm("foo contains ([{maxEditDistance:3,prefixLength:5}]fuzzy(\"a\"))"); + } + } diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index f40e212adde..3a6641b0d7a 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -47,9 +47,7 @@ import com.yahoo.search.searchchain.Execution; import org.junit.Test; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -389,6 +387,21 @@ public class YqlParserTestCase { assertSame(FuzzyItem.class, root.getClass()); assertEquals("baz", ((FuzzyItem) root).getIndexName()); assertEquals("a b", ((FuzzyItem) root).stringValue()); + assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, ((FuzzyItem) root).getMaxEditDistance()); + assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, ((FuzzyItem) root).getPrefixLength()); + } + + @Test + public void testFuzzyAnnotations() { + QueryTree x = parse( + "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10}fuzzy(\"a b\"))" + ); + Item root = x.getRoot(); + assertSame(FuzzyItem.class, root.getClass()); + assertEquals("baz", ((FuzzyItem) root).getIndexName()); + assertEquals("a b", ((FuzzyItem) root).stringValue()); + assertEquals(3, ((FuzzyItem) root).getMaxEditDistance()); + assertEquals(10, ((FuzzyItem) root).getPrefixLength()); } @Test |