diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-09-19 22:03:39 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-09-19 22:03:39 +0200 |
commit | 03cca9c42b32d8d4ba24b348f8466de42bb05cd7 (patch) | |
tree | a6348759bf87ddb7cece3e74477067dc3981188a /indexinglanguage/src/main | |
parent | 7ccaece3a2d065de9eb5a4cb18bc2f02d908d595 (diff) |
Add 'encode' expression
Diffstat (limited to 'indexinglanguage/src/main')
14 files changed, 133 insertions, 23 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java index 662d221d3a6..34da5b47655 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java @@ -55,13 +55,14 @@ public final class ScriptParser { T call(IndexingParser parser) throws ParseException; } - private static <T extends Expression> T parse(ScriptParserContext config, ParserMethod<T> method) + private static <T extends Expression> T parse(ScriptParserContext context, ParserMethod<T> method) throws ParseException { - CharStream input = config.getInputStream(); + CharStream input = context.getInputStream(); IndexingParser parser = new IndexingParser(input); - parser.setAnnotatorConfig(config.getAnnotatorConfig()); - parser.setDefaultFieldName(config.getDefaultFieldName()); - parser.setLinguistics(config.getLinguistcs()); + parser.setAnnotatorConfig(context.getAnnotatorConfig()); + parser.setDefaultFieldName(context.getDefaultFieldName()); + parser.setLinguistics(context.getLinguistcs()); + parser.setEncoder(context.getEncoder()); try { return method.call(parser); } catch (ParseException e) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java index e1071e25042..06be91703fa 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.indexinglanguage; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.indexinglanguage.parser.CharStream; @@ -13,11 +14,13 @@ public class ScriptParserContext { private AnnotatorConfig annotatorConfig = new AnnotatorConfig(); private Linguistics linguistics; + private final Encoder encoder; private String defaultFieldName = null; private CharStream inputStream = null; - public ScriptParserContext(Linguistics linguistics) { + public ScriptParserContext(Linguistics linguistics, Encoder encoder) { this.linguistics = linguistics; + this.encoder = encoder; } public AnnotatorConfig getAnnotatorConfig() { @@ -38,6 +41,10 @@ public class ScriptParserContext { return this; } + public Encoder getEncoder() { + return encoder; + } + public String getDefaultFieldName() { return defaultFieldName; } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java index 02abd13f5db..dbf50dbf9a7 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; /** * @author Simon Thoresen Hult diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java new file mode 100644 index 00000000000..09034659ad0 --- /dev/null +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.indexinglanguage.expressions; + +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.language.process.Encoder; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +/** + * Encodes a string as a tensor using the configured Encoder component + * + * @author bratseth + */ +public class EncodeExpression extends Expression { + + private final Encoder encoder; + + /** The target type we are encoding to. Set during verification. */ + private TensorType targetType; + + public EncodeExpression(Encoder encoder) { + super(DataType.STRING); + this.encoder = encoder; + } + + @Override + protected void doExecute(ExecutionContext context) { + StringFieldValue input = (StringFieldValue) context.getValue(); + Tensor tensor = encoder.encode(input.getString(), context.getLanguage(), targetType); + context.setValue(new TensorFieldValue(tensor)); + } + + @Override + protected void doVerify(VerificationContext context) { + String outputField = context.getOutputField(); + if (outputField == null) + throw new VerificationException(this, "No output field in this statement: " + + "Don't know what tensor type to encode to."); + DataType outputFieldType = context.getInputType(this, outputField); + if ( ! (outputFieldType instanceof TensorDataType) ) + throw new VerificationException(this, "The type of the output field " + outputField + + " is not a tensor but " + outputField); + targetType = ((TensorDataType) outputFieldType).getTensorType(); + context.setValueType(createdOutputType()); + } + + @Override + public DataType createdOutputType() { + return new TensorDataType(targetType); + } + + @Override + public String toString() { return "encode"; } + + @Override + public int hashCode() { return 1; } + + @Override + public boolean equals(Object o) { return o instanceof EncodeExpression; } + +} diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java index 6be1f78d376..1aec13bff50 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java @@ -136,4 +136,5 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl value = null; return this; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java index da43ff2f2a3..a121df8e5a8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java @@ -6,6 +6,7 @@ import com.yahoo.document.Document; import com.yahoo.document.DocumentUpdate; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.*; import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; @@ -185,11 +186,11 @@ public abstract class Expression extends Selectable { /** Creates an expression with simple lingustics for testing */ public static Expression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static Expression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static Expression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static Expression newInstance(ScriptParserContext context) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java index 2abed23b0d0..0ac195efb5d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; import com.yahoo.vespa.objects.ObjectOperation; import com.yahoo.vespa.objects.ObjectPredicate; diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java index 731aa5bf7c3..398c2751bd8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java @@ -57,4 +57,5 @@ public abstract class OutputExpression extends Expression { public int hashCode() { return getClass().hashCode() + (fieldName != null ? fieldName.hashCode() : 0); } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java index 035de999962..7317cb2216f 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -91,11 +92,11 @@ public final class ScriptExpression extends ExpressionList<StatementExpression> /** Creates an expression with simple lingustics for testing */ @SuppressWarnings("deprecation") public static ScriptExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static ScriptExpression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static ScriptExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static ScriptExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java index b640c43f3b2..f5354938c67 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -20,8 +22,11 @@ import java.util.List; */ public final class StatementExpression extends ExpressionList<Expression> { + /** The name of the (last) output field tthis statement will write to, or null if none */ + private String outputField; + /** The type of the output created by this statement, or null if no output */ - private final DataType outputType; + private DataType outputType; public StatementExpression(Expression... lst) { this(Arrays.asList(lst)); @@ -38,7 +43,7 @@ public final class StatementExpression extends ExpressionList<Expression> { @Override protected void doExecute(ExecutionContext context) { - context.setOutputType(createdOutputType()); + context.setOutputType(outputType); for (Expression exp : this) { context.execute(exp); } @@ -46,9 +51,14 @@ public final class StatementExpression extends ExpressionList<Expression> { @Override protected void doVerify(VerificationContext context) { - for (Expression exp : this) { - context.execute(exp); + for (Expression expression : this) { + if (expression instanceof OutputExpression) + outputField = ((OutputExpression)expression).getFieldName(); } + context.setOutputField(outputField); + for (Expression expression : this) + context.execute(expression); + outputType = context.getValueType(); } private static DataType resolveInputType(Iterable<Expression> lst) { @@ -98,11 +108,11 @@ public final class StatementExpression extends ExpressionList<Expression> { /** Creates an expression with simple lingustics for testing */ public static StatementExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static StatementExpression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static StatementExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static StatementExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java index 8fc582a01af..7c467b1e08d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; /** * @author Simon Thoresen Hult diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java index ef08c01633a..1847db0f29e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java @@ -19,4 +19,5 @@ final class UnresolvedDataType extends PrimitiveDataType { public boolean isValueCompatible(FieldValue value) { return value != null; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java index 601d751739d..61d796e5356 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java @@ -14,6 +14,7 @@ public class VerificationContext implements FieldTypeAdapter, Cloneable { private final Map<String, DataType> variables = new HashMap<String, DataType>(); private final FieldTypeAdapter adapter; private DataType value; + private String outputField; public VerificationContext() { this.adapter = null; @@ -49,16 +50,25 @@ public class VerificationContext implements FieldTypeAdapter, Cloneable { return this; } - /** Returns the output type that will result from executing the statement verified by this */ public DataType getValueType() { return value; } + /** Sets the output value type */ public VerificationContext setValueType(DataType value) { this.value = value; return this; } + /** Sets the name of the (last) output field of the statement this is executed as a part of */ + public void setOutputField(String outputField) { this.outputField = outputField; } + + /** + * Returns the name of the (last) output field of the statement this is executed as a part of, + * or null if none or not yet verified + */ + public String getOutputField() { return outputField; } + public VerificationContext clear() { variables.clear(); value = null; diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 632a19a2c6c..4533a17954c 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -34,6 +34,7 @@ import com.yahoo.text.StringUtilities; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.language.process.StemMode; +import com.yahoo.language.process.Encoder; import com.yahoo.language.Linguistics; /** @@ -44,6 +45,7 @@ public class IndexingParser { private String defaultFieldName; private Linguistics linguistics; + private Encoder encoder; private AnnotatorConfig annotatorCfg; public IndexingParser(String str) { @@ -60,6 +62,11 @@ public class IndexingParser { return this; } + public IndexingParser setEncoder(Encoder encoder) { + this.encoder = encoder; + return this; + } + public IndexingParser setAnnotatorConfig(AnnotatorConfig cfg) { annotatorCfg = cfg; return this; @@ -150,6 +157,7 @@ TOKEN : <CREATE_IF_NON_EXISTENT: "create_if_non_existent"> | <ECHO: "echo"> | <ELSE: "else"> | + <ENCODE: "encode"> | <EXACT: "exact"> | <FLATTEN: "flatten"> | <FOR_EACH: "for_each"> | @@ -275,6 +283,7 @@ Expression value() : val = base64EncodeExp() | val = clearStateExp() | val = echoExp() | + val = encodeExp() | val = exactExp() | val = flattenExp() | val = forEachExp() | @@ -356,6 +365,12 @@ Expression echoExp() : { } { return new EchoExpression(); } } +Expression encodeExp() : { } +{ + ( <ENCODE> ) + { return new EncodeExpression(encoder); } +} + Expression exactExp() : { } { ( <EXACT> ) @@ -729,6 +744,7 @@ String identifier() : <ECHO> | <EXACT> | <ELSE> | + <ENCODE> | <FLATTEN> | <FOR_EACH> | <GET_FIELD> | |