diff options
Diffstat (limited to 'indexinglanguage/src/main/java/com/yahoo/vespa')
9 files changed, 35 insertions, 43 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java index 0b4308d68a9..3c3f75a6693 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldPathUpdateAdapter.java @@ -66,9 +66,6 @@ public class FieldPathUpdateAdapter implements UpdateAdapter { return adapter.setOutputValue(exp, fieldName, fieldValue); } - @Override - public DocumentType getDocumentType() { return adapter.getDocumentType(); } - @SuppressWarnings({ "unchecked", "rawtypes" }) private void createUpdatesAt(List<FieldPathEntry> path, FieldValue value, int idx, DocumentUpdate out) { FieldPath updatePath = update.getFieldPath(); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java index 0ab962cd908..dac710d560b 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/FieldUpdateAdapter.java @@ -95,9 +95,6 @@ public class FieldUpdateAdapter implements UpdateAdapter { return adapter.setOutputValue(exp, fieldName, fieldValue); } - @Override - public DocumentType getDocumentType() { return adapter.getDocumentType(); } - public static FieldUpdateAdapter fromPartialUpdate(DocumentAdapter documentAdapter, ValueUpdate valueUpdate) { return new FieldUpdateAdapter(null, documentAdapter, new PartialBuilder(valueUpdate)); } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java index 6bca95e3f47..783346d2aa7 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/IdentityFieldPathUpdateAdapter.java @@ -67,7 +67,4 @@ public class IdentityFieldPathUpdateAdapter implements UpdateAdapter { fwdAdapter.tryOutputType(exp, fieldName, valueType); } - @Override - public DocumentType getDocumentType() { return fwdAdapter.getDocumentType(); } - } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/SimpleDocumentAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/SimpleDocumentAdapter.java index f36c44539c7..36d0c9212dc 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/SimpleDocumentAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/SimpleDocumentAdapter.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.indexinglanguage; import com.yahoo.document.DataType; import com.yahoo.document.Document; +import com.yahoo.document.DocumentType; import com.yahoo.document.Field; import com.yahoo.document.FieldPath; import com.yahoo.document.datatypes.FieldValue; diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java index 650e7ee06ff..4f4541ba5ee 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java @@ -11,6 +11,7 @@ import com.yahoo.language.detect.Detection; import java.util.HashMap; import java.util.Map; +import java.util.Objects; /** * @author Simon Thoresen Hult @@ -74,12 +75,6 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl return this; } - @Override - public DocumentType getDocumentType() { - if (adapter == null) return null; // Only happens in tests - return adapter.getDocumentType(); - } - public FieldValueAdapter getAdapter() { return adapter; } @@ -98,8 +93,7 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl } public ExecutionContext setLanguage(Language language) { - language.getClass(); - this.language = language; + this.language = Objects.requireNonNull(language); return this; } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java index d5b595490cb..1d07318c32d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/FieldValueAdapter.java @@ -10,8 +10,6 @@ import com.yahoo.document.datatypes.FieldValue; */ public interface FieldValueAdapter extends FieldTypeAdapter { - DocumentType getDocumentType(); - FieldValue getInputValue(String fieldName); FieldValue getInputValue(FieldPath fieldPath); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index 91bd85420e0..3f2b6a5825a 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -2,9 +2,12 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.language.Language; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.LinguisticsContext; import com.yahoo.language.process.StemMode; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.indexinglanguage.linguistics.LinguisticsAnnotator; @@ -15,6 +18,7 @@ import com.yahoo.vespa.indexinglanguage.linguistics.LinguisticsAnnotator; public final class TokenizeExpression extends Expression { private final Linguistics linguistics; + private LinguisticsContext linguisticsContext = LinguisticsContext.empty(); private final AnnotatorConfig config; public TokenizeExpression(Linguistics linguistics, AnnotatorConfig config) { @@ -32,17 +36,24 @@ public final class TokenizeExpression extends Expression { } @Override + public void setStatementOutput(DocumentType documentType, Field field) { + linguisticsContext = new LinguisticsContext.Builder().schema(documentType.getName()) + .field( field.getName()) + .build(); + } + + @Override protected void doExecute(ExecutionContext context) { StringFieldValue input = (StringFieldValue)context.getValue(); StringFieldValue output = input.clone(); context.setValue(output); - AnnotatorConfig cfg = new AnnotatorConfig(config); + AnnotatorConfig config = new AnnotatorConfig(this.config); Language lang = context.resolveLanguage(linguistics); if (lang != null) { - cfg.setLanguage(lang); + config.setLanguage(lang); } - LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, cfg); + LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, linguisticsContext, config); annotator.annotate(output, context); } @@ -74,13 +85,8 @@ public final class TokenizeExpression extends Expression { @Override public boolean equals(Object obj) { - if (!(obj instanceof TokenizeExpression)) { - return false; - } - TokenizeExpression rhs = (TokenizeExpression)obj; - if (!config.equals(rhs.config)) { - return false; - } + if ( ! (obj instanceof TokenizeExpression rhs)) return false; + if ( ! config.equals(rhs.config)) return false; return true; } @@ -88,4 +94,5 @@ public final class TokenizeExpression extends Expression { public int hashCode() { return getClass().hashCode() + config.hashCode(); } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 441ac711cc3..03efee5f271 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -13,7 +13,7 @@ public class AnnotatorConfig implements Cloneable { private Language language; private StemMode stemMode; private boolean removeAccents; - private int maxTermOccurences; + private int maxTermOccurrences; private int maxTokenizeLength; public static final int DEFAULT_MAX_TERM_OCCURRENCES; @@ -29,7 +29,7 @@ public class AnnotatorConfig implements Cloneable { language = Language.ENGLISH; stemMode = StemMode.NONE; removeAccents = false; - maxTermOccurences = DEFAULT_MAX_TERM_OCCURRENCES; + maxTermOccurrences = DEFAULT_MAX_TERM_OCCURRENCES; maxTokenizeLength = DEFAULT_MAX_TOKENIZE_LENGTH; } @@ -37,7 +37,7 @@ public class AnnotatorConfig implements Cloneable { language = rhs.language; stemMode = rhs.stemMode; removeAccents = rhs.removeAccents; - maxTermOccurences = rhs.maxTermOccurences; + maxTermOccurrences = rhs.maxTermOccurrences; maxTokenizeLength = rhs.maxTokenizeLength; } @@ -74,11 +74,11 @@ public class AnnotatorConfig implements Cloneable { } public int getMaxTermOccurrences() { - return maxTermOccurences; + return maxTermOccurrences; } public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) { - this.maxTermOccurences = maxTermCount; + this.maxTermOccurrences = maxTermCount; return this; } @@ -110,7 +110,7 @@ public class AnnotatorConfig implements Cloneable { if (removeAccents != rhs.removeAccents) { return false; } - if (maxTermOccurences != rhs.maxTermOccurences) { + if (maxTermOccurrences != rhs.maxTermOccurrences) { return false; } if (maxTokenizeLength != rhs.maxTokenizeLength) { @@ -122,6 +122,6 @@ public class AnnotatorConfig implements Cloneable { @Override public int hashCode() { return getClass().hashCode() + language.hashCode() + stemMode.hashCode() + - Boolean.valueOf(removeAccents).hashCode() + maxTermOccurences + maxTokenizeLength; + Boolean.valueOf(removeAccents).hashCode() + maxTermOccurrences + maxTokenizeLength; } } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java index 879a6b2ce8e..18f09a72fc9 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotator.java @@ -28,6 +28,7 @@ import static com.yahoo.language.LinguisticsCase.toLowerCase; public class LinguisticsAnnotator { private final Linguistics factory; + private final LinguisticsContext linguisticsContext; private final AnnotatorConfig config; private static class TermOccurrences { @@ -56,8 +57,9 @@ public class LinguisticsAnnotator { * @param factory the linguistics factory to use when annotating * @param config the linguistics config to use */ - public LinguisticsAnnotator(Linguistics factory, AnnotatorConfig config) { + public LinguisticsAnnotator(Linguistics factory, LinguisticsContext context, AnnotatorConfig config) { this.factory = factory; + this.linguisticsContext = context; this.config = config; } @@ -70,15 +72,14 @@ public class LinguisticsAnnotator { public boolean annotate(StringFieldValue text, ExecutionContext context) { if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS. - Tokenizer tokenizer = factory.getTokenizer(); + Tokenizer tokenizer = factory.getTokenizer(linguisticsContext); String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength()); Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), - config.getRemoveAccents(), - new LinguisticsContext(context.getDocumentType().getName())); + config.getRemoveAccents()); TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences()); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS); for (Token token : tokens) @@ -93,9 +94,9 @@ public class LinguisticsAnnotator { * Creates a TERM annotation which has the lowercase value as annotation (only) if it is different from the * original. * - * @param termToLowerCase The term to lower case. - * @param origTerm The original term. - * @return the created TERM annotation. + * @param termToLowerCase the term to lower case + * @param origTerm the original term + * @return the created TERM annotation */ public static Annotation lowerCaseTermAnnotation(String termToLowerCase, String origTerm) { String annotationValue = toLowerCase(termToLowerCase); |