summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-07-02 12:56:45 +0200
committerGitHub <noreply@github.com>2021-07-02 12:56:45 +0200
commit7537c42fa661952916cf91eebbb75e6984505100 (patch)
tree572a070aea8f67460a9304e69789bc32597bfb82 /config-model
parent12aa238ae9caa794a864013ec8db84097f0df668 (diff)
parent91ac31746b47eecf998a3d07173f0158bfeb4392 (diff)
Merge pull request #18514 from vespa-engine/bratseth/tokenization-test
Add a test
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/test/derived/tokenization/ilscripts.cfg9
-rw-r--r--config-model/src/test/derived/tokenization/tokenization.sd23
-rwxr-xr-xconfig-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java19
3 files changed, 51 insertions, 0 deletions
diff --git a/config-model/src/test/derived/tokenization/ilscripts.cfg b/config-model/src/test/derived/tokenization/ilscripts.cfg
new file mode 100644
index 00000000000..ead74110db3
--- /dev/null
+++ b/config-model/src/test/derived/tokenization/ilscripts.cfg
@@ -0,0 +1,9 @@
+maxtermoccurrences 100
+fieldmatchmaxlength 1000000
+ilscript[].doctype "tokenization"
+ilscript[].docfield[] "text"
+ilscript[].docfield[] "text_array"
+ilscript[].content[] "clear_state | guard { input text_array | for_each { lowercase } | for_each { normalize } | for_each { tokenize normalize stem:\"BEST\" } | index text_array_derived | summary text_array_derived; }"
+ilscript[].content[] "clear_state | guard { input text | normalize | tokenize normalize stem:\"BEST\" | index text_derived | summary text_derived; }"
+ilscript[].content[] "clear_state | guard { input text | tokenize normalize stem:\"BEST\" | index text | summary text; }"
+ilscript[].content[] "clear_state | guard { input text_array | for_each { tokenize normalize stem:\"BEST\" } | index text_array | summary text_array; }" \ No newline at end of file
diff --git a/config-model/src/test/derived/tokenization/tokenization.sd b/config-model/src/test/derived/tokenization/tokenization.sd
new file mode 100644
index 00000000000..4510a574d60
--- /dev/null
+++ b/config-model/src/test/derived/tokenization/tokenization.sd
@@ -0,0 +1,23 @@
+schema tokenization {
+
+ document tokenization {
+
+ field text type string {
+ indexing: index | summary
+ }
+
+ field text_array type array<string> {
+ indexing: index | summary
+ }
+
+ }
+
+ field text_derived type string {
+ indexing: input text | normalize | index | summary
+ }
+
+ field text_array_derived type array<string> {
+ indexing: input text_array | for_each { lowercase } | for_each { normalize } | index | summary
+ }
+
+} \ No newline at end of file
diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java
new file mode 100755
index 00000000000..6fe367ef6d1
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java
@@ -0,0 +1,19 @@
+// Copyright Verizon media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchdefinition.derived;
+
+import com.yahoo.searchdefinition.parser.ParseException;
+import org.junit.Test;
+
+import java.io.IOException;
+
+/**
+ * @author bratseh
+ */
+public class TokenizationTestCase extends AbstractExportingTestCase {
+
+ @Test
+ public void testTokenizationScripts() throws IOException, ParseException {
+ assertCorrectDeriving("tokenization");
+ }
+
+}