diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-05-19 12:30:14 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-05-19 12:30:14 +0200 |
commit | 7bd62f04b17f15aec6fa5dee504a835a8d7be047 (patch) | |
tree | c5d1f2f7bdc30fda4cc71911625dfa9adad6a1fc /container-search | |
parent | deb8fc1e9f2aeff3528f9a85e028a1a8b8529a20 (diff) |
Change NFKC normalization default to false
Currently NFKC normalization
- always happens for tokenized fields regardless of this setting (query and index)
- never happens for non-tokenized fields on the indexing side
Because of this, OFF is a better default as it does not change anything for tokenized fields
but makes things consistent by default for non-tokenized fields.
Diffstat (limited to 'container-search')
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/yql/YqlParser.java | 4 | ||||
-rw-r--r-- | container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java | 5 |
2 files changed, 6 insertions, 3 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index 176ef051968..dc01b0dd3fb 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -1267,8 +1267,10 @@ public class YqlParser implements Parser { boolean exactMatch, Language language) { String wordData = rawWord; - if (getAnnotation(ast, NFKC, Boolean.class, Boolean.TRUE, + if (getAnnotation(ast, NFKC, Boolean.class, Boolean.FALSE, "setting for whether to NFKC normalize input data")) { + // NOTE: If this is set to FALSE (default), we will still NFKC normalize text data + // during tokenization/segmentation, as that is always turned on also on the indexing side. wordData = normalizer.normalize(wordData); } boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS, diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index 32facd7a754..42f285cf096 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -354,9 +354,10 @@ public class YqlParserTestCase { assertEquals("\u00e5", getRootWord("select foo from bar where baz contains " + "([ {\"nfkc\": true} ]\"a\\u030a\");").getWord()); - assertEquals("\u00e5", + assertEquals("No NKFC by default", + "a\u030a", getRootWord("select foo from bar where baz contains " + - "\"a\\u030a\";").getWord()); + "(\"a\\u030a\");").getWord()); } @Test |