diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java | 34 |
1 files changed, 13 insertions, 21 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index b71bd57539f..2dc2254df68 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -3,7 +3,6 @@ package com.yahoo.prelude.query.parser; import com.yahoo.language.Linguistics; import com.yahoo.language.process.CharacterClasses; -import com.yahoo.language.process.SpecialTokens; import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Substring; @@ -201,7 +200,7 @@ public final class Tokenizer { } StringBuilder tmp = new StringBuilder(); for (int i = 0; i < tokencnt; i++) { - Token useToken = tokens.get(backtrack + i); + Token useToken = tokens.get(backtrack+i); tmp.append(useToken.image); } String indexName = tmp.toString(); @@ -217,20 +216,20 @@ public final class Tokenizer { } private int consumeSpecialToken(int start) { - SpecialTokens.Token token = getSpecialToken(start); - if (token == null) return start; - tokens.add(toToken(token, start, source)); - return start + token.token().length(); + SpecialTokens.SpecialToken specialToken=getSpecialToken(start); + if (specialToken==null) return start; + tokens.add(specialToken.toToken(start,source)); + return start + specialToken.token().length(); } - private SpecialTokens.Token getSpecialToken(int start) { + private SpecialTokens.SpecialToken getSpecialToken(int start) { if (specialTokens == null) return null; return specialTokens.tokenize(source.substring(start), substringSpecialTokens); } private int consumeExact(int start,Index index) { if (index.getExactTerminator() == null) return consumeHeuristicExact(start); - return consumeToTerminator(start, index.getExactTerminator()); + return consumeToTerminator(start,index.getExactTerminator()); } private boolean looksLikeExactEnd(int end) { @@ -468,7 +467,7 @@ public final class Tokenizer { /** Consumes a word or number <i>and/or possibly</i> a special token starting within this word or number */ private int consumeWordOrNumber(int start, Index currentIndex) { int tokenEnd = start; - SpecialTokens.Token substringToken = null; + SpecialTokens.SpecialToken substringSpecialToken = null; boolean digitsOnly = true; // int underscores = 0; // boolean underscoresOnly = true; @@ -476,8 +475,8 @@ public final class Tokenizer { while (tokenEnd < source.length()) { if (substringSpecialTokens) { - substringToken = getSpecialToken(tokenEnd); - if (substringToken != null) break; + substringSpecialToken = getSpecialToken(tokenEnd); + if (substringSpecialToken != null) break; } int c = source.codePointAt(tokenEnd); @@ -525,11 +524,11 @@ public final class Tokenizer { } } - if (substringToken == null) + if (substringSpecialToken == null) return --tokenEnd; // TODO: test the logic around tokenEnd with friends - addToken(toToken(substringToken, tokenEnd, source)); - return --tokenEnd + substringToken.token().length(); + addToken(substringSpecialToken.toToken(tokenEnd, source)); + return --tokenEnd + substringSpecialToken.token().length(); } private void addToken(Token.Kind kind, String word, int start, int end) { @@ -540,11 +539,4 @@ public final class Tokenizer { tokens.add(token); } - public Token toToken(SpecialTokens.Token specialToken, int start, String rawSource) { - return new Token(Token.Kind.WORD, - specialToken.replacement(), - true, - new Substring(start, start + specialToken.token().length(), rawSource)); // XXX: Unsafe? - } - } |