aboutsummaryrefslogtreecommitdiffstats
path: root/indexinglanguage
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-04-26 10:24:44 +0200
committerJon Bratseth <bratseth@gmail.com>2022-11-02 13:39:24 +0100
commit055e776ed2d47ea3bc95b493f075a0c3d26b9729 (patch)
tree5afdf905516125d94450af72522a16399cee50a0 /indexinglanguage
parent522935f83ecfa8ed6f0e55859ecb9330d012c73b (diff)
Linguistics context WIP
Diffstat (limited to 'indexinglanguage')
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java3
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java4
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java4
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java8
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java4
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java2
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java11
7 files changed, 32 insertions, 4 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java
index 3c3f75a6693..0b4308d68a9 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java
@@ -66,6 +66,9 @@ public class FieldPathUpdateAdapter implements UpdateAdapter {
return adapter.setOutputValue(exp, fieldName, fieldValue);
}
+ @Override
+ public DocumentType getDocumentType() { return adapter.getDocumentType(); }
+
@SuppressWarnings({ "unchecked", "rawtypes" })
private void createUpdatesAt(List<FieldPathEntry> path, FieldValue value, int idx, DocumentUpdate out) {
FieldPath updatePath = update.getFieldPath();
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java
index 4182c133000..0ab962cd908 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.indexinglanguage;
import com.yahoo.document.DataType;
import com.yahoo.document.Document;
+import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.Field;
import com.yahoo.document.FieldPath;
@@ -94,6 +95,9 @@ public class FieldUpdateAdapter implements UpdateAdapter {
return adapter.setOutputValue(exp, fieldName, fieldValue);
}
+ @Override
+ public DocumentType getDocumentType() { return adapter.getDocumentType(); }
+
public static FieldUpdateAdapter fromPartialUpdate(DocumentAdapter documentAdapter, ValueUpdate valueUpdate) {
return new FieldUpdateAdapter(null, documentAdapter, new PartialBuilder(valueUpdate));
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java
index 5406ca67c63..6bca95e3f47 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.indexinglanguage;
import com.yahoo.document.DataType;
import com.yahoo.document.Document;
+import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.FieldPath;
import com.yahoo.document.datatypes.FieldValue;
@@ -66,4 +67,7 @@ public class IdentityFieldPathUpdateAdapter implements UpdateAdapter {
fwdAdapter.tryOutputType(exp, fieldName, valueType);
}
+ @Override
+ public DocumentType getDocumentType() { return fwdAdapter.getDocumentType(); }
+
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java
index 389259cc811..650e7ee06ff 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.indexinglanguage.expressions;
import com.yahoo.document.DataType;
+import com.yahoo.document.DocumentType;
import com.yahoo.document.FieldPath;
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Language;
@@ -21,6 +22,7 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl
private FieldValue value;
private Language language;
+ /** For testing only. */
public ExecutionContext() {
this(null);
}
@@ -72,6 +74,12 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl
return this;
}
+ @Override
+ public DocumentType getDocumentType() {
+ if (adapter == null) return null; // Only happens in tests
+ return adapter.getDocumentType();
+ }
+
public FieldValueAdapter getAdapter() {
return adapter;
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java
index 6f6074db2fd..d5b595490cb 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.indexinglanguage.expressions;
+import com.yahoo.document.DocumentType;
import com.yahoo.document.FieldPath;
import com.yahoo.document.datatypes.FieldValue;
@@ -9,7 +10,10 @@ import com.yahoo.document.datatypes.FieldValue;
*/
public interface FieldValueAdapter extends FieldTypeAdapter {
+ DocumentType getDocumentType();
+
FieldValue getInputValue(String fieldName);
+
FieldValue getInputValue(FieldPath fieldPath);
FieldValueAdapter setOutputValue(Expression exp, String fieldName, FieldValue fieldValue);
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
index 577bc1e8d28..91bd85420e0 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java
@@ -43,7 +43,7 @@ public final class TokenizeExpression extends Expression {
cfg.setLanguage(lang);
}
LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, cfg);
- annotator.annotate(output);
+ annotator.annotate(output, context);
}
@Override
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
index 173df65a47e..879a6b2ce8e 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java
@@ -9,9 +9,11 @@ import com.yahoo.document.annotation.SpanTree;
import com.yahoo.document.annotation.SpanTrees;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Linguistics;
+import com.yahoo.language.process.LinguisticsContext;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.Tokenizer;
+import com.yahoo.vespa.indexinglanguage.expressions.ExecutionContext;
import java.util.HashMap;
import java.util.Map;
@@ -65,15 +67,18 @@ public class LinguisticsAnnotator {
* @param text the text to annotate
* @return whether anything was annotated
*/
- public boolean annotate(StringFieldValue text) {
+ public boolean annotate(StringFieldValue text, ExecutionContext context) {
if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS.
Tokenizer tokenizer = factory.getTokenizer();
String input = (text.getString().length() <= config.getMaxTokenizeLength())
? text.getString()
: text.getString().substring(0, config.getMaxTokenizeLength());
- Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(),
- config.getRemoveAccents());
+ Iterable<Token> tokens = tokenizer.tokenize(input,
+ config.getLanguage(),
+ config.getStemMode(),
+ config.getRemoveAccents(),
+ new LinguisticsContext(context.getDocumentType().getName()));
TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences());
SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS);
for (Token token : tokens)