diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-20 15:12:15 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-01-20 15:12:15 +0100 |
commit | 09caf52b327f6a48af8acf02872a49e08d75c9c9 (patch) | |
tree | 7e3b4422ebe5a6ec71e2b2fbaeabb3cb4306f226 /container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java | |
parent | 262d072c1ac996b34f6c70efc95853be699ca935 (diff) |
Detect language after tokenization
This is a prerequisite to try to be smarter about what subset of the input text is used for language detection,
however it breaks functionality in one subtle way: If an application does not pass language explicitly (such that
it must be detected), and the input is CJK, and there are configured special tokens, those special tokens will
not be detected if they are surrounded by word characters (instead of e.g space).
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java index e0089fb89ea..95cce001469 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/AnyParser.java @@ -35,7 +35,7 @@ public class AnyParser extends SimpleParser { Item filterRoot; setState(queryLanguage, indexFacts); - tokenize(filter, null, indexFacts); + tokenize(filter, null, indexFacts, queryLanguage); filterRoot = anyItems(true); @@ -134,7 +134,7 @@ public class AnyParser extends SimpleParser { Item applyFilter(Item root, String filter, Language queryLanguage, IndexFacts.Session indexFacts) { setState(queryLanguage, indexFacts); - tokenize(filter, null, indexFacts); + tokenize(filter, null, indexFacts, queryLanguage); return filterItems(root); } |