aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-04-19 11:19:18 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-04-19 13:45:59 +0000
commit0afbf14df1ee158167f70016545e799af1e433dc (patch)
tree5af98617f61cb76fcfe897585c9c2712955de3b9 /container-search/src/test/java/com/yahoo
parent433cb01e19f6bb51d6a2d029482a6e16431cb055 (diff)
Wire fuzzy prefix matching support through the query stack
Adds `prefix:[true|false]` annotation support to the `fuzzy` query operator in the YQL and JSON query languages. Fuzzy prefix matching semantics are wired through to the matcher implementations for both indexed and streaming search. Example usage: {maxEditDistance:1,prefix:true}fuzzy("foo") Will match `foo`, `foobar`, `foxtrot`, `zookeeper` and so on. It can be combined with the existing prefix locking feature: {maxEditDistance:1,prefixLength:2,prefix:true}fuzzy("foo") Which will match `foo`, `foobar`, `foxtrot` etc, but _not_ `zookeeper` since the locked prefix (`fo`) does not match. Due to the complexities involved with extending the legacy binary query stack representation, signalling prefix matching for the fuzzy term is done by pragmatically adding a new, generic "prefix matching" term-level flag. This is currently ignored for everything except fuzzy query items. Modernizing the query stack format to make it more extensible (i.e. move encoding to Protobuf) is on the backlog...!
Diffstat (limited to 'container-search/src/test/java/com/yahoo')
-rw-r--r--container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java23
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java5
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java22
-rw-r--r--container-search/src/test/java/com/yahoo/select/SelectTestCase.java35
4 files changed, 62 insertions, 23 deletions
diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
index c4b8c9f2044..027152bfd69 100644
--- a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
@@ -55,14 +55,13 @@ public class ValidateFuzzySearcherTestCase {
searcher = new ValidateFuzzySearcher();
}
- private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) {
- return "select * from sources * where " + attribute +
- " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" +
- "fuzzy(\"" + query + "\"))";
+ private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength, boolean prefixMatch) {
+ return "select * from sources * where %s contains ({maxEditDistance:%d,prefixLength:%d,prefix:%b}fuzzy(\"%s\"))"
+ .formatted(attribute, maxEditDistance, prefixLength, prefixMatch, query);
}
private String makeQuery(String attribute, String query) {
- return makeQuery(attribute, query, 2, 0);
+ return makeQuery(attribute, query, 2, 0, false);
}
@@ -76,7 +75,7 @@ public class ValidateFuzzySearcherTestCase {
if (validAttributes.contains(attribute)) {
assertNull(r.hits().getError());
} else {
- assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r);
+ assertErrMsg("FUZZY(fuzzy,2,0,false) " + attribute + ":fuzzy field is not a string attribute", r);
}
}
}
@@ -85,28 +84,28 @@ public class ValidateFuzzySearcherTestCase {
void testInvalidEmptyStringQuery() {
String q = makeQuery("string_single", "");
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r);
+ assertErrMsg("FUZZY(,2,0,false) string_single: fuzzy query must be non-empty", r);
}
@Test
void testInvalidQueryWrongMaxEditDistance() {
- String q = makeQuery("string_single", "fuzzy", -1, 0);
+ String q = makeQuery("string_single", "fuzzy", -1, 0, false);
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r);
+ assertErrMsg("FUZZY(fuzzy,-1,0,false) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r);
}
@Test
void testInvalidQueryWrongPrefixLength() {
- String q = makeQuery("string_single", "fuzzy", 2, -1);
+ String q = makeQuery("string_single", "fuzzy", 2, -1, true);
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r);
+ assertErrMsg("FUZZY(fuzzy,2,-1,true) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r);
}
@Test
void testInvalidQueryWrongAttributeName() {
String q = makeQuery("wrong_name", "fuzzy");
Result r = doSearch(searcher, q);
- assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r);
+ assertErrMsg("FUZZY(fuzzy,2,0,false) wrong_name:fuzzy field is not a string attribute", r);
}
private static void assertErrMsg(String message, Result r) {
diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
index 20ca81234a6..b5e2839c4c0 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
@@ -464,7 +464,12 @@ public class VespaSerializerTestCase {
@Test
void testFuzzyAnnotations() {
+ parseAndConfirm("foo contains ({maxEditDistance:3}fuzzy(\"a\"))");
parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({maxEditDistance:3,prefixLength:5,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({prefixLength:5,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({maxEditDistance:3,prefix:true}fuzzy(\"a\"))");
+ parseAndConfirm("foo contains ({prefix:true}fuzzy(\"a\"))");
}
@Test
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index 29a651aabf4..91f5984481a 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -437,23 +437,27 @@ public class YqlParserTestCase {
QueryTree x = parse("select foo from bar where baz contains fuzzy(\"a b\")");
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("baz", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
- assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, ((FuzzyItem) root).getMaxEditDistance());
- assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, ((FuzzyItem) root).getPrefixLength());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("baz", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance());
+ assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength());
+ assertFalse(fuzzy.isPrefixMatch());
}
@Test
void testFuzzyAnnotations() {
QueryTree x = parse(
- "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10}fuzzy(\"a b\"))"
+ "select foo from bar where baz contains ({maxEditDistance: 3, prefixLength: 10, prefix: true}fuzzy(\"a b\"))"
);
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("baz", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
- assertEquals(3, ((FuzzyItem) root).getMaxEditDistance());
- assertEquals(10, ((FuzzyItem) root).getPrefixLength());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("baz", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(3, fuzzy.getMaxEditDistance());
+ assertEquals(10, fuzzy.getPrefixLength());
+ assertTrue(fuzzy.isPrefixMatch());
}
@Test
diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
index f4571f04a5d..f863816dab2 100644
--- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
+++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
@@ -671,8 +671,39 @@ public class SelectTestCase {
QueryTree x = parseWhere("{ \"contains\": [\"description\", { \"fuzzy\": [\"a b\"] }] }");
Item root = x.getRoot();
assertSame(FuzzyItem.class, root.getClass());
- assertEquals("description", ((FuzzyItem) root).getIndexName());
- assertEquals("a b", ((FuzzyItem) root).stringValue());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("description", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(FuzzyItem.DEFAULT_MAX_EDIT_DISTANCE, fuzzy.getMaxEditDistance());
+ assertEquals(FuzzyItem.DEFAULT_PREFIX_LENGTH, fuzzy.getPrefixLength());
+ assertFalse(fuzzy.isPrefixMatch());
+ }
+
+ @Test
+ void fuzzy_with_annotations() {
+ var where = """
+ {
+ "contains": ["description", {
+ "fuzzy": {
+ "children": ["a b"],
+ "attributes": {
+ "maxEditDistance": 3,
+ "prefixLength": 10,
+ "prefix": true
+ }
+ }
+ }]
+ }
+ """;
+ QueryTree x = parseWhere(where);
+ Item root = x.getRoot();
+ assertSame(FuzzyItem.class, root.getClass());
+ var fuzzy = (FuzzyItem) root;
+ assertEquals("description", fuzzy.getIndexName());
+ assertEquals("a b", fuzzy.stringValue());
+ assertEquals(3, fuzzy.getMaxEditDistance());
+ assertEquals(10, fuzzy.getPrefixLength());
+ assertTrue(fuzzy.isPrefixMatch());
}
//------------------------------------------------------------------- grouping tests