diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-05-22 23:08:48 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-05-22 23:08:48 +0200 |
commit | cc60531ac22a7e9601055174a02a6e67c428f800 (patch) | |
tree | 8a1f336745c8ae2da36ca55501e1192ad111ac32 /container-search/src | |
parent | 179a1d90ca76fa61bcbeb3967a58fd3e9b5e9654 (diff) |
Always treat each symbol as a separate token
Diffstat (limited to 'container-search/src')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java | 4 | ||||
-rw-r--r-- | container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java | 2 |
2 files changed, 4 insertions, 2 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index c1d415b8e27..01bb606e9ee 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -107,7 +107,9 @@ public final class Tokenizer { if (i >= source.length()) break; int c = source.codePointAt(i); - if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { + if (characterClasses.isSymbol(c)) { // treat each symbol is a separate word + addToken(WORD, Character.toString(c), i, i + 1); + } else if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { i = consumeWordOrNumber(i, currentIndex); } else if (Character.isWhitespace(c)) { addToken(SPACE, " ", i, i + 1); diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java index 475b7beb879..c2f533d4cfd 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java @@ -2589,7 +2589,7 @@ public class ParseTestCase { String emoji2 = "\uD83D\uDE00"; // 😀 tester.assertParsed(emoji2, emoji2, Query.Type.ANY); - tester.assertParsed(emoji1 + emoji2, emoji1 + emoji2, Query.Type.ANY); + tester.assertParsed("AND " + emoji1 + " " + emoji2, emoji1 + emoji2, Query.Type.ANY); } } |