Merge pull request #18514 from vespa-engine/bratseth/tokenization-test

Add a test
author: Jon Bratseth <bratseth@oath.com> 2021-07-02 12:56:45 +0200
committer: GitHub <noreply@github.com> 2021-07-02 12:56:45 +0200
commit: 7537c42fa661952916cf91eebbb75e6984505100 (patch)
tree: 572a070aea8f67460a9304e69789bc32597bfb82 /config-model
parent: 12aa238ae9caa794a864013ec8db84097f0df668 (diff)
parent: 91ac31746b47eecf998a3d07173f0158bfeb4392 (diff)
3 files changed, 51 insertions, 0 deletions
diff --git a/config-model/src/test/derived/tokenization/ilscripts.cfg b/config-model/src/test/derived/tokenization/ilscripts.cfg
new file mode 100644
index 00000000000..ead74110db3
--- /dev/null
+++ b/config-model/src/test/derived/tokenization/ilscripts.cfg
@@ -0,0 +1,9 @@
+maxtermoccurrences 100
+fieldmatchmaxlength 1000000
+ilscript[].doctype "tokenization"
+ilscript[].docfield[] "text"
+ilscript[].docfield[] "text_array"
+ilscript[].content[] "clear_state | guard { input text_array | for_each { lowercase } | for_each { normalize } | for_each { tokenize normalize stem:\"BEST\" } | index text_array_derived | summary text_array_derived; }"
+ilscript[].content[] "clear_state | guard { input text | normalize | tokenize normalize stem:\"BEST\" | index text_derived | summary text_derived; }"
+ilscript[].content[] "clear_state | guard { input text | tokenize normalize stem:\"BEST\" | index text | summary text; }"
+ilscript[].content[] "clear_state | guard { input text_array | for_each { tokenize normalize stem:\"BEST\" } | index text_array | summary text_array; }"
+\ No newline at end of file
diff --git a/config-model/src/test/derived/tokenization/tokenization.sd b/config-model/src/test/derived/tokenization/tokenization.sd
new file mode 100644
index 00000000000..4510a574d60
--- /dev/null
+++ b/config-model/src/test/derived/tokenization/tokenization.sd
@@ -0,0 +1,23 @@
+schema tokenization {
+
+  document tokenization {
+
+    field text type string {
+      indexing: index | summary
+    }
+
+    field text_array type array<string> {
+      indexing: index | summary
+    }
+
+  }
+
+  field text_derived type string {
+    indexing: input text | normalize | index | summary
+  }
+
+  field text_array_derived type array<string> {
+    indexing: input text_array | for_each { lowercase } | for_each { normalize } | index | summary
+  }
+
+}
+\ No newline at end of file
diff --git a/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java b/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java
new file mode 100755
index 00000000000..6fe367ef6d1
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/searchdefinition/derived/TokenizationTestCase.java
@@ -0,0 +1,19 @@
+// Copyright Verizon media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.searchdefinition.derived;
+
+import com.yahoo.searchdefinition.parser.ParseException;
+import org.junit.Test;
+
+import java.io.IOException;
+
+/**
+ * @author bratseh
+ */
+public class TokenizationTestCase extends AbstractExportingTestCase {
+
+    @Test
+    public void testTokenizationScripts() throws IOException, ParseException {
+        assertCorrectDeriving("tokenization");
+    }
+
+}
author	Jon Bratseth <bratseth@oath.com>	2021-07-02 12:56:45 +0200
committer	GitHub <noreply@github.com>	2021-07-02 12:56:45 +0200
commit	7537c42fa661952916cf91eebbb75e6984505100 (patch)
tree	572a070aea8f67460a9304e69789bc32597bfb82 /config-model
parent	12aa238ae9caa794a864013ec8db84097f0df668 (diff)
parent	91ac31746b47eecf998a3d07173f0158bfeb4392 (diff)