aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-01-22 14:54:27 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-01-22 14:54:27 +0000
commit00c89ec2a67dbd32e88204cc6a0cafa099cf0f0c (patch)
tree36db685df5fea09ad857aaae3bc8037887cf72fc
parentccda952db487445f3522eecbcbfee4a6f6a90c32 (diff)
Treat regex and fuzzy whole-field matching as 1 logical word
We have concluded that this is the most semantically correct way of reporting the count, and as a bonus it avoids having to do a separate pass over the string buffer.
-rw-r--r--streamingvisitors/src/tests/searcher/searcher_test.cpp16
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp4
2 files changed, 18 insertions, 2 deletions
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp
index 705e14c11a5..eb233db9632 100644
--- a/streamingvisitors/src/tests/searcher/searcher_test.cpp
+++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp
@@ -729,6 +729,14 @@ TEST("utf8 flexible searcher handles regexes with explicit anchoring") {
TEST_DO(assertString(fs, "#^foo$", "oo", Hits()));
}
+TEST("utf8 flexible searcher regex matching treats field as 1 word") {
+ UTF8FlexibleStringFieldSearcher fs(0);
+ // Match case
+ TEST_DO(assertFieldInfo(fs, "#.*", "foo bar baz", QTFieldInfo(0, 1, 1)));
+ // Mismatch case
+ TEST_DO(assertFieldInfo(fs, "#^zoid$", "foo bar baz", QTFieldInfo(0, 0, 1)));
+}
+
TEST("utf8 flexible searcher handles fuzzy search in uncased mode") {
UTF8FlexibleStringFieldSearcher fs(0);
// Term syntax (only applies to these tests):
@@ -819,6 +827,14 @@ TEST("utf8 flexible searcher caps oversized fuzzy prefix length to term length")
TEST_DO(assertString(fs, "%{5,9001}zoid", "boid", Hits()));
}
+TEST("utf8 flexible searcher fuzzy matching treats field as 1 word") {
+ UTF8FlexibleStringFieldSearcher fs(0);
+ // Match case
+ TEST_DO(assertFieldInfo(fs, "%{1}foo bar baz", "foo jar baz", QTFieldInfo(0, 1, 1)));
+ // Mismatch case
+ TEST_DO(assertFieldInfo(fs, "%{1}foo", "foo bar baz", QTFieldInfo(0, 0, 1)));
+}
+
TEST("bool search") {
BoolFieldSearcher fs(0);
TEST_DO(assertBool(fs, "true", true, true));
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
index d648d2e252e..5f626ccb962 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp
@@ -37,7 +37,7 @@ UTF8FlexibleStringFieldSearcher::match_regexp(const FieldRef & f, search::stream
if (regexp_term->regexp().partial_match({f.data(), f.size()})) {
addHit(qt, 0);
}
- return countWords(f);
+ return 1;
}
size_t
@@ -50,7 +50,7 @@ UTF8FlexibleStringFieldSearcher::match_fuzzy(const FieldRef & f, search::streami
if (fuzzy_term->is_match({f.data(), f.size()})) {
addHit(qt, 0);
}
- return countWords(f);
+ return 1;
}
size_t