summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2017-08-07 18:14:53 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2017-08-07 18:15:39 +0200
commit92c0fcfe10c06163968dcfa2ff07993c2f2f74f5 (patch)
tree03b8a6e54809c2b2d0fe603f426d8ddaf3aaf63e /config-model
parenta69f61901d6a243eec05d7a8d60eecbf28d70931 (diff)
Add capping of fields before tokenizing
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/searchdefinition/processing/TextMatch.java3
-rw-r--r--config-model/src/test/derived/prefixexactattribute/ilscripts.cfg2
2 files changed, 4 insertions, 1 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TextMatch.java
index 9edc3dd00b1..a4d7b1b4054 100644
--- a/config-model/src/main/java/com/yahoo/searchdefinition/processing/TextMatch.java
+++ b/config-model/src/main/java/com/yahoo/searchdefinition/processing/TextMatch.java
@@ -71,6 +71,9 @@ public class TextMatch extends Processor {
}
ret.setStemMode(activeStemming.toStemMode());
ret.setRemoveAccents(field.getNormalizing().doRemoveAccents());
+ if ((field.getMatching() != null) && (field.getMatching().maxLength() != null)) {
+ ret.setMaxTokenLength(field.getMatching().maxLength());
+ }
return ret;
}
diff --git a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg
index 76c9bf9abf5..4d68178000e 100644
--- a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg
+++ b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg
@@ -5,7 +5,7 @@ ilscript[0].docfield[1] "attributefield1"
ilscript[0].docfield[2] "attributefield2"
ilscript[0].docfield[3] "indexfield1"
ilscript[0].docfield[4] "indexfield2"
-ilscript[0].content[0] "clear_state | guard { input indexfield0 | tokenize normalize stem:\"SHORTEST\" | index indexfield0; }"
+ilscript[0].content[0] "clear_state | guard { input indexfield0 | tokenize normalize stem:\"SHORTEST\" max-length:79 | index indexfield0; }"
ilscript[0].content[1] "clear_state | guard { input attributefield1 | attribute attributefield1; }"
ilscript[0].content[2] "clear_state | guard { input attributefield2 | attribute attributefield2; }"
ilscript[0].content[3] "clear_state | guard { input indexfield1 | exact | index indexfield1; }"