summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-04-04 16:23:07 +0200
committerAlexey Chernyshev <aleksei@spotify.com>2022-04-07 14:44:30 +0200
commit7e9b33401201db9a9e22971dd419247e268bbfaa (patch)
treef5032a82e9fa74247b2fdeb3dcde4dc6cf98ce89 /container-search
parentad7cc1d11f0c19baa2344a643377576c559555f7 (diff)
Propagating annotations for fuzzy query
Diffstat (limited to 'container-search')
-rw-r--r--container-search/abi-spec.json13
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java64
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/SelectParser.java11
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java29
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java21
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java5
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java17
7 files changed, 136 insertions, 24 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index dab61dfd46d..958dfaf0c65 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -548,7 +548,11 @@
"public"
],
"methods": [
- "public void <init>(java.lang.String, boolean, java.lang.String)",
+ "public void <init>(java.lang.String, boolean, java.lang.String, int, int)",
+ "public void setMaxEditDistance(int)",
+ "public void setPrefixLength(int)",
+ "public int getPrefixLength()",
+ "public int getMaxEditDistance()",
"public void setValue(java.lang.String)",
"public java.lang.String getRawWord()",
"public boolean isWords()",
@@ -560,10 +564,13 @@
"public int getNumWords()",
"public boolean equals(java.lang.Object)",
"public int hashCode()",
- "public java.lang.String toString()",
+ "protected void appendHeadingString(java.lang.StringBuilder)",
"protected void encodeThis(java.nio.ByteBuffer)"
],
- "fields": []
+ "fields": [
+ "public static int DefaultMaxEditDistance",
+ "public static int DefaultPrefixLength"
+ ]
},
"com.yahoo.prelude.query.GeoLocationItem": {
"superClass": "com.yahoo.prelude.query.TermItem",
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
index 74c31a5d1f0..fda96aa6ecc 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
@@ -1,7 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;
+import com.yahoo.compress.IntegerCompressor;
+
import java.nio.ByteBuffer;
+import java.util.Objects;
/**
* Fuzzy search term
@@ -11,9 +14,37 @@ import java.nio.ByteBuffer;
public class FuzzyItem extends TermItem {
private String term;
- public FuzzyItem(String indexName, boolean isFromQuery, String term) {
+ private int maxEditDistance;
+ private int prefixLength;
+
+ public static int DefaultMaxEditDistance = 2;
+ public static int DefaultPrefixLength = 0;
+
+ public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) {
super(indexName, isFromQuery, null);
setValue(term);
+ setMaxEditDistance(maxEditDistance);
+ setPrefixLength(prefixLength);
+ }
+
+ public void setMaxEditDistance(int maxEditDistance) {
+ if (maxEditDistance < 0)
+ throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance);
+ this.maxEditDistance = maxEditDistance;
+ }
+
+ public void setPrefixLength(int prefixLength) {
+ if (prefixLength < 0)
+ throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength);
+ this.prefixLength = prefixLength;
+ }
+
+ public int getPrefixLength() {
+ return this.prefixLength;
+ }
+
+ public int getMaxEditDistance() {
+ return this.maxEditDistance;
}
@Override
@@ -73,35 +104,36 @@ public class FuzzyItem extends TermItem {
return false;
}
FuzzyItem other = (FuzzyItem) obj;
- if (term == null) {
- if (other.term != null) {
- return false;
- }
- } else if (!term.equals(other.term)) {
- return false;
- }
+ if (!this.term.equals(other.term)) return false;
+ if (this.maxEditDistance != other.maxEditDistance) return false;
+ if (this.prefixLength != other.prefixLength) return false;
return true;
}
@Override
public int hashCode() {
- final int prime = 31;
- int result = super.hashCode();
- result = prime * result + ((term == null) ? 0 : term.hashCode());
- return result;
+ return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength);
}
@Override
- public String toString() {
- StringBuilder builder = new StringBuilder();
- builder.append("FuzzyItem [term=").append(term).append("]");
- return builder.toString();
+ protected void appendHeadingString(StringBuilder buffer) {
+ buffer.append(getName());
+ buffer.append("(");
+ buffer.append(this.term);
+ buffer.append(",");
+ buffer.append(this.maxEditDistance);
+ buffer.append(",");
+ buffer.append(this.prefixLength);
+ buffer.append(")");
+ buffer.append(" ");
}
@Override
protected void encodeThis(ByteBuffer buffer) {
super.encodeThis(buffer);
putString(getIndexedString(), buffer);
+ IntegerCompressor.putCompressedPositiveNumber(this.maxEditDistance, buffer);
+ IntegerCompressor.putCompressedPositiveNumber(this.prefixLength, buffer);
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
index 320148ec01d..c1d1cdd566b 100644
--- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
+++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
@@ -61,6 +61,8 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
+import static com.yahoo.search.yql.YqlParser.MAX_EDIT_DISTANCE;
+import static com.yahoo.search.yql.YqlParser.PREFIX_LENGTH;
import static com.yahoo.slime.Type.ARRAY;
import static com.yahoo.slime.Type.DOUBLE;
import static com.yahoo.slime.Type.LONG;
@@ -1161,9 +1163,16 @@ public class SelectParser implements Parser {
private Item instantiateFuzzyItem(String field, String key, Inspector value) {
HashMap<Integer, Inspector> children = childMap(value);
+ HashMap<String, Inspector> annotations = getAnnotationMap(value);
+
Preconditions.checkArgument(children.size() == 1, "Expected 1 argument, got %s.", children.size());
+
String wordData = children.get(0).asString();
- FuzzyItem fuzzy = new FuzzyItem(field, true, wordData);
+
+ Integer maxEditDistance = getIntegerAnnotation(MAX_EDIT_DISTANCE, annotations, FuzzyItem.DefaultMaxEditDistance);
+ Integer prefixLength = getIntegerAnnotation(PREFIX_LENGTH, annotations, FuzzyItem.DefaultPrefixLength);
+
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength);
return leafStyleSettings(getAnnotations(value), fuzzy);
}
diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
index e778798b0e5..4b511df5e5f 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
@@ -21,6 +21,7 @@ import static com.yahoo.search.yql.YqlParser.GEO_LOCATION;
import static com.yahoo.search.yql.YqlParser.HIT_LIMIT;
import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS;
import static com.yahoo.search.yql.YqlParser.LABEL;
+import static com.yahoo.search.yql.YqlParser.MAX_EDIT_DISTANCE;
import static com.yahoo.search.yql.YqlParser.NEAR;
import static com.yahoo.search.yql.YqlParser.NEAREST_NEIGHBOR;
import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE;
@@ -31,6 +32,7 @@ import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET;
import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL;
import static com.yahoo.search.yql.YqlParser.PHRASE;
import static com.yahoo.search.yql.YqlParser.PREFIX;
+import static com.yahoo.search.yql.YqlParser.PREFIX_LENGTH;
import static com.yahoo.search.yql.YqlParser.RANGE;
import static com.yahoo.search.yql.YqlParser.RANK;
import static com.yahoo.search.yql.YqlParser.RANKED;
@@ -526,7 +528,8 @@ public class VespaSerializer {
@Override
boolean serialize(StringBuilder destination, FuzzyItem fuzzy) {
- String annotations = leafAnnotations(fuzzy);
+ String annotations = fuzzyAnnotations(fuzzy);
+
destination.append(normalizeIndexName(fuzzy.getIndexName())).append(" contains ");
if (annotations.length() > 0) {
@@ -543,6 +546,30 @@ public class VespaSerializer {
}
return false;
}
+
+ static String fuzzyAnnotations(FuzzyItem fuzzyItem) {
+ boolean isMaxEditDistanceSet = fuzzyItem.getMaxEditDistance() != FuzzyItem.DefaultMaxEditDistance;
+ boolean isPrefixLengthSet = fuzzyItem.getPrefixLength() != FuzzyItem.DefaultPrefixLength;
+ boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet;
+
+ StringBuilder builder = new StringBuilder();
+ if (anyAnnotationSet) {
+ builder.append("[{");
+ }
+ if (isMaxEditDistanceSet) {
+ builder.append(MAX_EDIT_DISTANCE + ":").append(fuzzyItem.getMaxEditDistance());
+ }
+ if (isMaxEditDistanceSet && isPrefixLengthSet) {
+ builder.append(",");
+ }
+ if (isPrefixLengthSet) {
+ builder.append(PREFIX_LENGTH + ":").append(fuzzyItem.getPrefixLength());
+ }
+ if (anyAnnotationSet) {
+ builder.append("}]");
+ }
+ return builder.toString();
+ }
}
private static class ONearSerializer extends Serializer<ONearItem> {
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index 06ee0e706f3..fcb19dde10d 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -126,6 +126,8 @@ public class YqlParser implements Parser {
private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking";
private static final String STEM_DESCRIPTION = "setting for whether to use stem if field implies it";
private static final String USE_POSITION_DATA_DESCRIPTION = "setting for whether to use position data for ranking this item";
+ private static final String MAX_EDIT_DISTANCE_DESCRIPTION = "setting for an inclusive upper bound for a fuzzy edit-distance search";
+ private static final String PREFIX_LENGTH_DESCRIPTION = "setting for a prefix length that is considered frozen for a fuzzy search";
private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty";
private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex";
private static final String USER_INPUT_GRAMMAR = "grammar";
@@ -194,6 +196,9 @@ public class YqlParser implements Parser {
public static final String WEIGHT = "weight";
public static final String WEIGHTED_SET = "weightedSet";
public static final String FUZZY = "fuzzy";
+ public static final String MAX_EDIT_DISTANCE = "maxEditDistance";
+ public static final String PREFIX_LENGTH = "prefixLength";
+
private final IndexFacts indexFacts;
private final List<ConnectedItem> connectedItems = new ArrayList<>();
@@ -1313,7 +1318,21 @@ public class YqlParser implements Parser {
String wordData = getStringContents(args.get(0));
- FuzzyItem fuzzy = new FuzzyItem(field, true, wordData);
+ Integer maxEditDistance = getAnnotation(
+ ast,
+ MAX_EDIT_DISTANCE,
+ Integer.class,
+ FuzzyItem.DefaultMaxEditDistance,
+ MAX_EDIT_DISTANCE_DESCRIPTION);
+
+ Integer prefixLength = getAnnotation(
+ ast,
+ PREFIX_LENGTH,
+ Integer.class,
+ FuzzyItem.DefaultPrefixLength,
+ PREFIX_LENGTH_DESCRIPTION);
+
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength);
return leafStyleSettings(ast, fuzzy);
}
diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
index 1269c2a5aef..8a90d224003 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
@@ -449,4 +449,9 @@ public class VespaSerializerTestCase {
parseAndConfirm("foo contains fuzzy(\"a\")");
}
+ @Test
+ public void testFuzzyAnnotations() {
+ parseAndConfirm("foo contains ([{maxEditDistance:3,prefixLength:5}]fuzzy(\"a\"))");
+ }
+
}
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index f40e212adde..0ee0597689f 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -47,9 +47,7 @@ import com.yahoo.search.searchchain.Execution;
import org.junit.Test;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -389,6 +387,21 @@ public class YqlParserTestCase {
assertSame(FuzzyItem.class, root.getClass());
assertEquals("baz", ((FuzzyItem) root).getIndexName());
assertEquals("a b", ((FuzzyItem) root).stringValue());
+ assertEquals(FuzzyItem.DefaultMaxEditDistance, ((FuzzyItem) root).getMaxEditDistance());
+ assertEquals(FuzzyItem.DefaultPrefixLength, ((FuzzyItem) root).getPrefixLength());
+ }
+
+ @Test
+ public void testFuzzyAnnotations() {
+ QueryTree x = parse(
+ "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10}fuzzy(\"a b\"))"
+ );
+ Item root = x.getRoot();
+ assertSame(FuzzyItem.class, root.getClass());
+ assertEquals("baz", ((FuzzyItem) root).getIndexName());
+ assertEquals("a b", ((FuzzyItem) root).stringValue());
+ assertEquals(3, ((FuzzyItem) root).getMaxEditDistance());
+ assertEquals(10, ((FuzzyItem) root).getPrefixLength());
}
@Test