From ac4013dce87e29ffa3e2c4864c54fcffb18354c0 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 9 Mar 2020 09:47:07 +0100 Subject: Non-functional changes only --- .../com/yahoo/prelude/query/parser/Tokenizer.java | 33 ++++++++++------------ .../query/parser/test/TokenizerTestCase.java | 7 ++++- 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'container-search') diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index 61f09e2f7b7..5e243e52057 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -108,8 +108,7 @@ public final class Tokenizer { if (i >= source.length()) break; int c = source.codePointAt(i); - if (characterClasses.isLetterOrDigit(c) - || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { + if (characterClasses.isLetterOrDigit(c) || (c == '\'' && acceptApostropheAsWordCharacter(currentIndex))) { i = consumeWordOrNumber(i, currentIndex); } else if (Character.isWhitespace(c)) { addToken(SPACE, " ", i, i + 1); @@ -187,7 +186,6 @@ public final class Tokenizer { return true; } - @SuppressWarnings({"deprecation"}) private Index determineCurrentIndex(Index defaultIndex, IndexFacts.Session indexFacts) { int backtrack = tokens.size(); int tokencnt = 0; @@ -328,7 +326,6 @@ public final class Tokenizer { wantEndQuote = true; actualStart = curPos+1; } else if (wantEndQuote && looksLikeExactEnd(curPos+1)) { - // System.err.println("seen quoted token from "+actualStart+" to "+curPos); seenSome = true; wantEndQuote = false; isQuoted = true; @@ -435,7 +432,7 @@ public final class Tokenizer { if (suffStar) { addToken(STAR, "*", starPos, starPos + 1); } - tokens.add(new Token(WORD, source.substring(actualStart, end), true, new Substring(actualStart, end, source))); // XXX: Unsafe? + tokens.add(new Token(WORD, source.substring(actualStart, end), true, new Substring(actualStart, end, source))); // skip terminating quote if (isQuoted) { @@ -451,17 +448,17 @@ public final class Tokenizer { break; end++; } - tokens.add(new Token(WORD, source.substring(start, end), true, new Substring(start, end, source))); // XXX: Unsafe start? - if (end>=source.length()) + tokens.add(new Token(WORD, source.substring(start, end), true, new Substring(start, end, source))); + if (end >= source.length()) return end; else - return end+terminator.length(); // Don't create a token for the terminator + return end + terminator.length(); // Don't create a token for the terminator } private boolean terminatorStartsAt(int start,String terminator) { - int terminatorPosition=0; - while ((terminatorPosition+start)= terminator.length()) @@ -481,8 +478,8 @@ public final class Tokenizer { while (tokenEnd < source.length()) { if (substringSpecialTokens) { - substringSpecialToken=getSpecialToken(tokenEnd); - if (substringSpecialToken!=null) break; + substringSpecialToken = getSpecialToken(tokenEnd); + if (substringSpecialToken != null) break; } int c = source.codePointAt(tokenEnd); @@ -506,7 +503,7 @@ public final class Tokenizer { // underscoresOnly = false; quotesOnly = false; } else if (c == '\'') { - if (!acceptApostropheAsWordCharacter(currentIndex)) { + if ( ! acceptApostropheAsWordCharacter(currentIndex)) { break; } // Otherwise consume apostrophes... @@ -530,15 +527,15 @@ public final class Tokenizer { } } - if (substringSpecialToken==null) + if (substringSpecialToken == null) return --tokenEnd; // TODO: test the logic around tokenEnd with friends - addToken(substringSpecialToken.toToken(tokenEnd,source)); - return --tokenEnd+substringSpecialToken.token().length(); + addToken(substringSpecialToken.toToken(tokenEnd, source)); + return --tokenEnd + substringSpecialToken.token().length(); } private void addToken(Token.Kind kind, String word, int start, int end) { - addToken(new Token(kind, word, false, new Substring(start, end, source))); // XXX: Unsafe? + addToken(new Token(kind, word, false, new Substring(start, end, source))); } private void addToken(Token token) { diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java index 12d993e8d41..aa2e9dbcf75 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java @@ -46,7 +46,7 @@ public class TokenizerTestCase { Tokenizer tokenizer = new Tokenizer(new SimpleLinguistics()); tokenizer.setSpecialTokens(createSpecialTokens()); - List tokens = tokenizer.tokenize("drive (to hwy88, 88) +or language:en ugcapi_1"); + List tokens = tokenizer.tokenize("drive (to hwy88, 88) +or language:en ugcapi_1 & &a"); assertEquals(new Token(WORD, "drive"), tokens.get(0)); assertEquals(new Token(SPACE, " "), tokens.get(1)); @@ -69,6 +69,11 @@ public class TokenizerTestCase { assertEquals(new Token(WORD, "ugcapi"), tokens.get(18)); assertEquals(new Token(UNDERSCORE, "_"), tokens.get(19)); assertEquals(new Token(NUMBER, "1"), tokens.get(20)); + assertEquals(new Token(SPACE, " "), tokens.get(21)); + assertEquals(new Token(NOISE, ""), tokens.get(22)); + assertEquals(new Token(SPACE, " "), tokens.get(23)); + assertEquals(new Token(NOISE, ""), tokens.get(24)); + assertEquals(new Token(WORD, "a"), tokens.get(25)); } @Test -- cgit v1.2.3