aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-05-19 12:30:14 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2017-05-19 12:30:14 +0200
commit7bd62f04b17f15aec6fa5dee504a835a8d7be047 (patch)
treec5d1f2f7bdc30fda4cc71911625dfa9adad6a1fc /container-search
parentdeb8fc1e9f2aeff3528f9a85e028a1a8b8529a20 (diff)
Change NFKC normalization default to false
Currently NFKC normalization - always happens for tokenized fields regardless of this setting (query and index) - never happens for non-tokenized fields on the indexing side Because of this, OFF is a better default as it does not change anything for tokenized fields but makes things consistent by default for non-tokenized fields.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java4
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java5
2 files changed, 6 insertions, 3 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index 176ef051968..dc01b0dd3fb 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -1267,8 +1267,10 @@ public class YqlParser implements Parser {
boolean exactMatch,
Language language) {
String wordData = rawWord;
- if (getAnnotation(ast, NFKC, Boolean.class, Boolean.TRUE,
+ if (getAnnotation(ast, NFKC, Boolean.class, Boolean.FALSE,
"setting for whether to NFKC normalize input data")) {
+ // NOTE: If this is set to FALSE (default), we will still NFKC normalize text data
+ // during tokenization/segmentation, as that is always turned on also on the indexing side.
wordData = normalizer.normalize(wordData);
}
boolean fromQuery = getAnnotation(ast, IMPLICIT_TRANSFORMS,
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index 32facd7a754..42f285cf096 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -354,9 +354,10 @@ public class YqlParserTestCase {
assertEquals("\u00e5",
getRootWord("select foo from bar where baz contains " +
"([ {\"nfkc\": true} ]\"a\\u030a\");").getWord());
- assertEquals("\u00e5",
+ assertEquals("No NKFC by default",
+ "a\u030a",
getRootWord("select foo from bar where baz contains " +
- "\"a\\u030a\";").getWord());
+ "(\"a\\u030a\");").getWord());
}
@Test