aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2023-05-22 23:08:48 +0200
committerJon Bratseth <bratseth@vespa.ai>2023-05-22 23:08:48 +0200
commitcc60531ac22a7e9601055174a02a6e67c428f800 (patch)
tree8a1f336745c8ae2da36ca55501e1192ad111ac32 /container-search
parent179a1d90ca76fa61bcbeb3967a58fd3e9b5e9654 (diff)
Always treat each symbol as a separate token
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java4
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java2
2 files changed, 4 insertions, 2 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
index c1d415b8e27..01bb606e9ee 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java
@@ -107,7 +107,9 @@ public final class Tokenizer {
if (i >= source.length()) break;
int c = source.codePointAt(i);
- if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) {
+ if (characterClasses.isSymbol(c)) { // treat each symbol is a separate word
+ addToken(WORD, Character.toString(c), i, i + 1);
+ } else if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) {
i = consumeWordOrNumber(i, currentIndex);
} else if (Character.isWhitespace(c)) {
addToken(SPACE, " ", i, i + 1);
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
index 475b7beb879..c2f533d4cfd 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java
@@ -2589,7 +2589,7 @@ public class ParseTestCase {
String emoji2 = "\uD83D\uDE00"; // 😀
tester.assertParsed(emoji2, emoji2, Query.Type.ANY);
- tester.assertParsed(emoji1 + emoji2, emoji1 + emoji2, Query.Type.ANY);
+ tester.assertParsed("AND " + emoji1 + " " + emoji2, emoji1 + emoji2, Query.Type.ANY);
}
}