aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-05-19 12:03:06 +0200
committerJon Bratseth <bratseth@gmail.com>2022-05-19 12:03:06 +0200
commit5c24dc5c9642a8d9ed70aee4c950fd0678a1ebec (patch)
treebd9b74bf00c832456f0b83c1b2cd7010be387d68 /config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java
parentf17c4fe7de4c55f5c4ee61897eab8c2f588d8405 (diff)
Rename the 'searchdefinition' package to 'schema'
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java51
1 files changed, 51 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java
new file mode 100644
index 00000000000..1e312b71afd
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.processing;
+
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.schema.RankProfileRegistry;
+import com.yahoo.schema.document.MatchType;
+import com.yahoo.schema.document.SDField;
+import com.yahoo.schema.document.Stemming;
+import com.yahoo.schema.Schema;
+import com.yahoo.vespa.model.container.search.QueryProfiles;
+
+/**
+ * The implementation of word matching - with word matching the field is assumed to contain a single "word" - some
+ * contiguous sequence of word and number characters - but without changing the data at the indexing side (as with text
+ * matching) to enforce this. Word matching is thus almost like exact matching on the indexing side (no action taken),
+ * and like text matching on the query side. This may be suitable for attributes, where people both expect the data to
+ * be left as in the input document, and trivially written queries to work by default. However, this may easily lead to
+ * data which cannot be matched at all as the indexing and query side does not agree.
+ *
+ * @author bratseth
+ */
+public class WordMatch extends Processor {
+
+ public WordMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) {
+ super(schema, deployLogger, rankProfileRegistry, queryProfiles);
+ }
+
+ public void process(boolean validate, boolean documentsOnly) {
+ for (SDField field : schema.allConcreteFields()) {
+ processFieldRecursive(field);
+ }
+ }
+
+ private void processFieldRecursive(SDField field) {
+ processField(field);
+ for (SDField structField : field.getStructFields()) {
+ processField(structField);
+ }
+ }
+
+ private void processField(SDField field) {
+ if (!field.getMatching().getType().equals(MatchType.WORD)) {
+ return;
+ }
+ field.setStemming(Stemming.NONE);
+ field.getNormalizing().inferLowercase();
+ field.addQueryCommand("word");
+ }
+
+
+}