summaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/processing/UriHack.java')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/UriHack.java77
1 files changed, 77 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java
new file mode 100644
index 00000000000..a4773a42ed6
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java
@@ -0,0 +1,77 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.processing;
+
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.document.ArrayDataType;
+import com.yahoo.document.CollectionDataType;
+import com.yahoo.document.DataType;
+import com.yahoo.document.WeightedSetDataType;
+import com.yahoo.schema.RankProfileRegistry;
+import com.yahoo.schema.Schema;
+import com.yahoo.schema.document.SDField;
+import com.yahoo.schema.document.Stemming;
+import com.yahoo.vespa.model.container.search.QueryProfiles;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * @author baldersheim
+ */
+public class UriHack extends Processor {
+
+ private static final List<String> URL_SUFFIX =
+ Arrays.asList("scheme", "host", "port", "path", "query", "fragment", "hostname");
+
+ UriHack(Schema schema,
+ DeployLogger deployLogger,
+ RankProfileRegistry rankProfileRegistry,
+ QueryProfiles queryProfiles) {
+ super(schema, deployLogger, rankProfileRegistry, queryProfiles);
+ }
+
+ @Override
+ public void process(boolean validate, boolean documentsOnly) {
+ for (SDField field : schema.allConcreteFields()) {
+ if (field.doesIndexing()) {
+ DataType fieldType = field.getDataType();
+ if (fieldType instanceof CollectionDataType) {
+ fieldType = ((CollectionDataType)fieldType).getNestedType();
+ }
+ if (fieldType == DataType.URI) {
+ processField(schema, field);
+ }
+ }
+ }
+ }
+
+ private void processField(Schema schema, SDField uriField) {
+ String uriName = uriField.getName();
+ uriField.setStemming(Stemming.NONE);
+ DataType generatedType = DataType.STRING;
+ if (uriField.getDataType() instanceof ArrayDataType) {
+ generatedType = new ArrayDataType(DataType.STRING);
+ }
+ else if (uriField.getDataType() instanceof WeightedSetDataType) {
+ WeightedSetDataType wdt = (WeightedSetDataType) uriField.getDataType();
+ generatedType = new WeightedSetDataType(DataType.STRING, wdt.createIfNonExistent(), wdt.removeIfZero());
+ }
+
+ for (String suffix : URL_SUFFIX) {
+ String partName = uriName + "." + suffix;
+ // I wonder if this is explicit in qrs or implicit in backend?
+ // search.addFieldSetItem(uriName, partName);
+ SDField partField = new SDField(schema.getDocument(), partName, generatedType);
+ partField.setIndexStructureField(uriField.doesIndexing());
+ partField.setRankType(uriField.getRankType());
+ partField.setStemming(Stemming.NONE);
+ partField.getNormalizing().inferLowercase();
+ if (uriField.getIndex(suffix) != null) {
+ partField.addIndex(uriField.getIndex(suffix));
+ }
+ schema.addExtraField(partField);
+ schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, partField.getName());
+ }
+ }
+
+}