summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-04-19 11:19:18 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-04-19 13:45:59 +0000
commit0afbf14df1ee158167f70016545e799af1e433dc (patch)
tree5af98617f61cb76fcfe897585c9c2712955de3b9 /container-search
parent433cb01e19f6bb51d6a2d029482a6e16431cb055 (diff)
Wire fuzzy prefix matching support through the query stack
Adds `prefix:[true|false]` annotation support to the `fuzzy` query operator in the YQL and JSON query languages. Fuzzy prefix matching semantics are wired through to the matcher implementations for both indexed and streaming search. Example usage: {maxEditDistance:1,prefix:true}fuzzy("foo") Will match `foo`, `foobar`, `foxtrot`, `zookeeper` and so on. It can be combined with the existing prefix locking feature: {maxEditDistance:1,prefixLength:2,prefix:true}fuzzy("foo") Which will match `foo`, `foobar`, `foxtrot` etc, but _not_ `zookeeper` since the locked prefix (`fo`) does not match. Due to the complexities involved with extending the legacy binary query stack representation, signalling prefix matching for the fuzzy term is done by pragmatically adding a new, generic "prefix matching" term-level flag. This is currently ignored for everything except fuzzy query items. Modernizing the query stack format to make it more extensible (i.e. move encoding to Protobuf) is on the backlog...!
Diffstat (limited to 'container-search')
-rw-r--r--container-search/abi-spec.json5
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java59
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/Item.java14
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/SelectParser.java3
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java25
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java9
-rw-r--r--container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java23
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java5
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java22
-rw-r--r--container-search/src/test/java/com/yahoo/select/SelectTestCase.java35
10 files changed, 144 insertions, 56 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 07f0449e61a..5e66e1bb746 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -555,11 +555,15 @@
"public"
],
"methods" : [
+ "public void <init>(java.lang.String, boolean, java.lang.String, int, int, boolean)",
"public void <init>(java.lang.String, boolean, java.lang.String, int, int)",
"public void setMaxEditDistance(int)",
"public void setPrefixLength(int)",
"public int getPrefixLength()",
"public int getMaxEditDistance()",
+ "public boolean isPrefixMatch()",
+ "public void setPrefixMatch(boolean)",
+ "protected boolean hasPrefixMatchSemantics()",
"public void setValue(java.lang.String)",
"public java.lang.String getRawWord()",
"public boolean isWords()",
@@ -820,6 +824,7 @@
"public abstract java.lang.String getName()",
"public void setFilter(boolean)",
"public boolean isFilter()",
+ "protected boolean hasPrefixMatchSemantics()",
"public com.yahoo.prelude.query.Item$ItemCreator getCreator()",
"public void setCreator(com.yahoo.prelude.query.Item$ItemCreator)",
"public void setWeight(int)",
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
index 3cf86a70985..b900dee20ba 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
@@ -16,15 +16,21 @@ public class FuzzyItem extends TermItem {
private int maxEditDistance;
private int prefixLength;
+ private boolean prefixMatch;
public static int DEFAULT_MAX_EDIT_DISTANCE = 2;
public static int DEFAULT_PREFIX_LENGTH = 0;
- public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) {
+ public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength, boolean prefixMatch) {
super(indexName, isFromQuery, null);
setValue(term);
setMaxEditDistance(maxEditDistance);
setPrefixLength(prefixLength);
+ setPrefixMatch(prefixMatch);
+ }
+
+ public FuzzyItem(String indexName, boolean isFromQuery, String term, int maxEditDistance, int prefixLength) {
+ this(indexName, isFromQuery, term, maxEditDistance, prefixLength, false);
}
public void setMaxEditDistance(int maxEditDistance) {
@@ -43,6 +49,19 @@ public class FuzzyItem extends TermItem {
return this.maxEditDistance;
}
+ public boolean isPrefixMatch() {
+ return this.prefixMatch;
+ }
+
+ public void setPrefixMatch(boolean prefixMatch) {
+ this.prefixMatch = prefixMatch;
+ }
+
+ @Override
+ protected boolean hasPrefixMatchSemantics() {
+ return this.prefixMatch;
+ }
+
@Override
public void setValue(String value) {
this.term = value;
@@ -89,43 +108,39 @@ public class FuzzyItem extends TermItem {
}
@Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- }
- if (!super.equals(obj)) {
- return false;
- }
- if (getClass() != obj.getClass()) {
- return false;
- }
- FuzzyItem other = (FuzzyItem) obj;
- if (!this.term.equals(other.term)) return false;
- if (this.maxEditDistance != other.maxEditDistance) return false;
- if (this.prefixLength != other.prefixLength) return false;
- return true;
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ if (!super.equals(o)) return false;
+ FuzzyItem fuzzyItem = (FuzzyItem) o;
+ return maxEditDistance == fuzzyItem.maxEditDistance &&
+ prefixLength == fuzzyItem.prefixLength &&
+ prefixMatch == fuzzyItem.prefixMatch &&
+ Objects.equals(term, fuzzyItem.term);
}
@Override
public int hashCode() {
- return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength);
+ return Objects.hash(super.hashCode(), term, maxEditDistance, prefixLength, prefixMatch);
}
@Override
protected void appendHeadingString(StringBuilder buffer) {
buffer.append(getName());
- buffer.append("(");
+ buffer.append('(');
buffer.append(this.term);
- buffer.append(",");
+ buffer.append(',');
buffer.append(this.maxEditDistance);
- buffer.append(",");
+ buffer.append(',');
buffer.append(this.prefixLength);
- buffer.append(")");
- buffer.append(" ");
+ buffer.append(',');
+ buffer.append(this.prefixMatch);
+ buffer.append(") ");
}
@Override
protected void encodeThis(ByteBuffer buffer) {
+ // Prefix matching is communicated via term header flags
super.encodeThis(buffer);
putString(getIndexedString(), buffer);
IntegerCompressor.putCompressedPositiveNumber(this.maxEditDistance, buffer);
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java
index f43b55424e6..099c546e3f0 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java
@@ -161,6 +161,16 @@ public abstract class Item implements Cloneable {
}
/**
+ * Indicates that a query item that does not normally match with prefix semantics
+ * should do so for this particular query item instance.
+ *
+ * False by default; should be overridden by subclasses that want to signal this behavior.
+ */
+ protected boolean hasPrefixMatchSemantics() {
+ return false;
+ }
+
+ /**
* Returns the item creator value.
*
* @deprecated use isFilter(boolean)
@@ -286,6 +296,7 @@ public abstract class Item implements Cloneable {
byte FLAGS_SPECIALTOKEN = 0x02;
byte FLAGS_NOPOSITIONDATA = 0x04;
byte FLAGS_ISFILTER = 0x08;
+ byte FLAGS_PREFIX_MATCH = 0x10;
byte ret = 0;
if (!isRanked()) {
@@ -300,6 +311,9 @@ public abstract class Item implements Cloneable {
if (isFilter()) {
ret |= FLAGS_ISFILTER;
}
+ if (hasPrefixMatchSemantics()) {
+ ret |= FLAGS_PREFIX_MATCH;
+ }
return ret;
}
diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
index c897afe144c..c90612425fa 100644
--- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
+++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
@@ -1150,8 +1150,9 @@ public class SelectParser implements Parser {
Integer maxEditDistance = getIntegerAnnotation(MAX_EDIT_DISTANCE, annotations, FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE);
Integer prefixLength = getIntegerAnnotation(PREFIX_LENGTH, annotations, FuzzyItem.DEFAULT_PREFIX_LENGTH);
+ boolean prefixMatch = getBoolAnnotation(PREFIX, annotations, Boolean.FALSE);
- FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength);
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength, prefixMatch);
return leafStyleSettings(getAnnotations(value), fuzzy);
}
diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
index 634163bf0c2..a354006aa9b 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
@@ -551,24 +551,31 @@ public class VespaSerializer {
static String fuzzyAnnotations(FuzzyItem fuzzyItem) {
boolean isMaxEditDistanceSet = fuzzyItem.getMaxEditDistance() != FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE;
boolean isPrefixLengthSet = fuzzyItem.getPrefixLength() != FuzzyItem.DEFAULT_PREFIX_LENGTH;
- boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet;
+ boolean isPrefixMatch = fuzzyItem.isPrefixMatch();
+ boolean anyAnnotationSet = isMaxEditDistanceSet || isPrefixLengthSet || isPrefixMatch;
- StringBuilder builder = new StringBuilder();
- if (anyAnnotationSet) {
- builder.append("{");
+ if (!anyAnnotationSet) {
+ return "";
}
+
+ StringBuilder builder = new StringBuilder();
+ builder.append("{");
if (isMaxEditDistanceSet) {
builder.append(MAX_EDIT_DISTANCE + ":").append(fuzzyItem.getMaxEditDistance());
- }
- if (isMaxEditDistanceSet && isPrefixLengthSet) {
- builder.append(",");
+ if (isPrefixLengthSet || isPrefixMatch) {
+ builder.append(",");
+ }
}
if (isPrefixLengthSet) {
builder.append(PREFIX_LENGTH + ":").append(fuzzyItem.getPrefixLength());
+ if (isPrefixMatch) {
+ builder.append(",");
+ }
}
- if (anyAnnotationSet) {
- builder.append("}");
+ if (isPrefixMatch) {
+ builder.append(PREFIX).append(':').append(fuzzyItem.isPrefixMatch());
}
+ builder.append("}");
return builder.toString();
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index e66cac5766c..fb4ec5ba872 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -1385,7 +1385,14 @@ public class YqlParser implements Parser {
FuzzyItem.DEFAULT_PREFIX_LENGTH,
PREFIX_LENGTH_DESCRIPTION);
- FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength);
+ boolean prefixMatch = getAnnotation(
+ ast,
+ PREFIX,
+ Boolean.class,
+ Boolean.FALSE,
+ "setting for whether to use prefix match of input data");
+
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData, maxEditDistance, prefixLength, prefixMatch);
return leafStyleSettings(ast, fuzzy);
}
diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
index c4b8c9f2044..027152bfd69 100644
--- a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
@@ -55,14 +55,13 @@ public class ValidateFuzzySearcherTestCase {
searcher = new ValidateFuzzySearcher();
}
- private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) {
- return "select * from sources * where " + attribute +
- " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" +
- "fuzzy(\"" + query + "\"))";
+ private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength, boolean prefixMatch) {
+ return "select * from sources * where %s contains ({maxEditDistance:%d,prefixLength:%d,prefix:%b}fuzzy(\"%s\"))"
+ .formatted(attribute, maxEditDistance, prefixLength, prefixMatch, query);
}
private String makeQuery(String attribute, String query) {
- return makeQuery(attribute, query, 2, 0);
+ return makeQuery(attribute, query, 2, 0, false);
}
@@ -76,7 +75,7 @@ public class ValidateFuzzySearcherTestCase {
if (validAttributes.contains(attribute)) {
assertNull(r.hits().getError());
} else {
- assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r);
+ assertErrMsg("FUZZY(fuzzy,2,0,false) " + attribute + ":fuzzy field is not a string attribute", r);
}
}
}
@@ -85,28 +84,28 @@ public class ValidateFuzzySearcherTestCase {
void testInvalidEmptyStringQuery() {
String q = makeQuery("string_single", "");
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r);
+ assertErrMsg("FUZZY(,2,0,false) string_single: fuzzy query must be non-empty", r);
}
@Test
void testInvalidQueryWrongMaxEditDistance() {
- String q = makeQuery("string_single", "fuzzy", -1, 0);
+ String q = makeQuery("string_single", "fuzzy", -1, 0, false);
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r);
+ assertErrMsg("FUZZY(fuzzy,-1,0,false) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r);
}
@Test
void testInvalidQueryWrongPrefixLength() {
- String q = makeQuery("string_single", "fuzzy", 2, -1);
+ String q = makeQuery("string_single", "fuzzy", 2, -1, true);
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r);
+ assertErrMsg("FUZZY(fuzzy,2,-1,true) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r);
}
@Test
void testInvalidQueryWrongAttributeName() {
String q = makeQuery("wrong_name", "fuzzy");
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r);
+ assertErrMsg("FUZZY(fuzzy,2,0,false) wrong_name:fuzzy field is not a string attribute", r);
}
private static void assertErrMsg(String message, Result r) {
diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
index 20ca81234a6..b5e2839c4c0 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
@@ -464,7 +464,12 @@ public class VespaSerializerTestCase {
@Test
void testFuzzyAnnotations() {
+ parseAndConfirm("foo contains ({maxEditDistance:3}fuzzy(\"a\"))");
parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({prefixLength:5,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({maxEditDistance:3,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({prefix:true}fuzzy(\"a\"))");
}
@Test
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index 29a651aabf4..91f5984481a 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -437,23 +437,27 @@ public class YqlParserTestCase {
QueryTree x = parse("select foo from bar where baz contains fuzzy(\"a b\")");
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("baz", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
- assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, ((FuzzyItem) root).getMaxEditDistance());
- assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, ((FuzzyItem) root).getPrefixLength());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("baz", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance());
+ assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength());
+ assertFalse(fuzzy.isPrefixMatch());
}
@Test
void testFuzzyAnnotations() {
QueryTree x = parse(
- "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10}fuzzy(\"a b\"))"
+ "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10, prefix: true}fuzzy(\"a b\"))"
);
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("baz", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
- assertEquals(3, ((FuzzyItem) root).getMaxEditDistance());
- assertEquals(10, ((FuzzyItem) root).getPrefixLength());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("baz", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(3, fuzzy.getMaxEditDistance());
+ assertEquals(10, fuzzy.getPrefixLength());
+ assertTrue(fuzzy.isPrefixMatch());
}
@Test
diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
index f4571f04a5d..f863816dab2 100644
--- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
+++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
@@ -671,8 +671,39 @@ public class SelectTestCase {
QueryTree x = parseWhere("{ \"contains\": [\"description\", { \"fuzzy\": [\"a b\"] }] }");
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("description", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("description", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance());
+ assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength());
+ assertFalse(fuzzy.isPrefixMatch());
+ }
+
+ @Test
+ void fuzzy_with_annotations() {
+ var where = """
+ {
+ "contains": ["description", {
+ "fuzzy": {
+ "children": ["a b"],
+ "attributes": {
+ "maxEditDistance": 3,
+ "prefixLength": 10,
+ "prefix": true
+ }
+ }
+ }]
+ }
+ """;
+ QueryTree x = parseWhere(where);
+ Item root = x.getRoot();
+ assertSame(FuzzyItem.class, root.getClass());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("description", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(3, fuzzy.getMaxEditDistance());
+ assertEquals(10, fuzzy.getPrefixLength());
+ assertTrue(fuzzy.isPrefixMatch());
}
//------------------------------------------------------------------- grouping tests