diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-09-28 21:19:41 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-09-28 21:19:41 +0200 |
commit | e7e659e9d26401c8c36300d4760d4e34acd26d0a (patch) | |
tree | 4c8b869a9ef991a6edda1c3a80e433b3b1690bbd /indexinglanguage | |
parent | 35223653327b86a059d23c543bbac3611d43775f (diff) |
encode -> embed
Diffstat (limited to 'indexinglanguage')
11 files changed, 53 insertions, 62 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java index 34da5b47655..649095d1db8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java @@ -62,7 +62,7 @@ public final class ScriptParser { parser.setAnnotatorConfig(context.getAnnotatorConfig()); parser.setDefaultFieldName(context.getDefaultFieldName()); parser.setLinguistics(context.getLinguistcs()); - parser.setEncoder(context.getEncoder()); + parser.setEmbedder(context.getEmbedder()); try { return method.call(parser); } catch (ParseException e) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java index 06be91703fa..77c2af8dd42 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java @@ -2,8 +2,7 @@ package com.yahoo.vespa.indexinglanguage; import com.yahoo.language.Linguistics; -import com.yahoo.language.process.Encoder; -import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.language.process.Embedder; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.indexinglanguage.parser.CharStream; @@ -14,13 +13,13 @@ public class ScriptParserContext { private AnnotatorConfig annotatorConfig = new AnnotatorConfig(); private Linguistics linguistics; - private final Encoder encoder; + private final Embedder embedder; private String defaultFieldName = null; private CharStream inputStream = null; - public ScriptParserContext(Linguistics linguistics, Encoder encoder) { + public ScriptParserContext(Linguistics linguistics, Embedder embedder) { this.linguistics = linguistics; - this.encoder = encoder; + this.embedder = embedder; } public AnnotatorConfig getAnnotatorConfig() { @@ -41,8 +40,8 @@ public class ScriptParserContext { return this; } - public Encoder getEncoder() { - return encoder; + public Embedder getEmbedder() { + return embedder; } public String getDefaultFieldName() { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java index f84da9ddef8..aa579ed729e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EmbedExpression.java @@ -5,25 +5,25 @@ import com.yahoo.document.DataType; import com.yahoo.document.TensorDataType; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.document.datatypes.TensorFieldValue; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; /** - * Encodes a string as a tensor using the configured Encoder component + * Embeds a string in a tensor space using the configured Embedder component * * @author bratseth */ -public class EncodeExpression extends Expression { +public class EmbedExpression extends Expression { - private final Encoder encoder; + private final Embedder embedder; - /** The target type we are encoding to. Set during verification. */ + /** The target type we are embedding into. */ private TensorType targetType; - public EncodeExpression(Encoder encoder) { + public EmbedExpression(Embedder embedder) { super(DataType.STRING); - this.encoder = encoder; + this.embedder = embedder; } @Override @@ -34,7 +34,7 @@ public class EncodeExpression extends Expression { @Override protected void doExecute(ExecutionContext context) { StringFieldValue input = (StringFieldValue) context.getValue(); - Tensor tensor = encoder.encode(input.getString(), context.getLanguage(), targetType); + Tensor tensor = embedder.embed(input.getString(), context.getLanguage(), targetType); context.setValue(new TensorFieldValue(tensor)); } @@ -43,7 +43,7 @@ public class EncodeExpression extends Expression { String outputField = context.getOutputField(); if (outputField == null) throw new VerificationException(this, "No output field in this statement: " + - "Don't know what tensor type to encode to."); + "Don't know what tensor type to embed into."); DataType outputFieldType = context.getInputType(this, outputField); if ( ! (outputFieldType instanceof TensorDataType) ) throw new VerificationException(this, "The type of the output field " + outputField + @@ -58,12 +58,12 @@ public class EncodeExpression extends Expression { } @Override - public String toString() { return "encode"; } + public String toString() { return "embed"; } @Override public int hashCode() { return 1; } @Override - public boolean equals(Object o) { return o instanceof EncodeExpression; } + public boolean equals(Object o) { return o instanceof EmbedExpression; } } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java index 67459c2b035..20a0c9804a9 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java @@ -6,7 +6,7 @@ import com.yahoo.document.Document; import com.yahoo.document.DocumentUpdate; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.*; import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; @@ -188,11 +188,11 @@ public abstract class Expression extends Selectable { /** Creates an expression with simple lingustics for testing */ public static Expression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); + return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse); } - public static Expression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { - return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); + public static Expression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression))); } public static Expression newInstance(ScriptParserContext context) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java index 7317cb2216f..b5f71813de3 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java @@ -4,7 +4,7 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -92,11 +92,11 @@ public final class ScriptExpression extends ExpressionList<StatementExpression> /** Creates an expression with simple lingustics for testing */ @SuppressWarnings("deprecation") public static ScriptExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); + return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse); } - public static ScriptExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { - return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); + public static ScriptExpression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression))); } public static ScriptExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java index 145133e210d..7d157af1a19 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java @@ -2,9 +2,8 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.TensorDataType; import com.yahoo.language.Linguistics; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -12,7 +11,6 @@ import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; import com.yahoo.vespa.indexinglanguage.parser.ParseException; import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -100,11 +98,11 @@ public final class StatementExpression extends ExpressionList<Expression> { /** Creates an expression with simple lingustics for testing */ public static StatementExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); + return fromString(expression, new SimpleLinguistics(), Embedder.throwsOnUse); } - public static StatementExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { - return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); + public static StatementExpression fromString(String expression, Linguistics linguistics, Embedder embedder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, embedder).setInputStream(new IndexingInput(expression))); } public static StatementExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 4533a17954c..3eee4ea6f08 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -34,7 +34,7 @@ import com.yahoo.text.StringUtilities; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.language.process.StemMode; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.Linguistics; /** @@ -45,7 +45,7 @@ public class IndexingParser { private String defaultFieldName; private Linguistics linguistics; - private Encoder encoder; + private Embedder embedder; private AnnotatorConfig annotatorCfg; public IndexingParser(String str) { @@ -62,8 +62,8 @@ public class IndexingParser { return this; } - public IndexingParser setEncoder(Encoder encoder) { - this.encoder = encoder; + public IndexingParser setEmbedder(Embedder embedder) { + this.embedder = embedder; return this; } @@ -157,7 +157,7 @@ TOKEN : <CREATE_IF_NON_EXISTENT: "create_if_non_existent"> | <ECHO: "echo"> | <ELSE: "else"> | - <ENCODE: "encode"> | + <EMBED: "embed"> | <EXACT: "exact"> | <FLATTEN: "flatten"> | <FOR_EACH: "for_each"> | @@ -283,7 +283,7 @@ Expression value() : val = base64EncodeExp() | val = clearStateExp() | val = echoExp() | - val = encodeExp() | + val = embedExp() | val = exactExp() | val = flattenExp() | val = forEachExp() | @@ -365,10 +365,10 @@ Expression echoExp() : { } { return new EchoExpression(); } } -Expression encodeExp() : { } +Expression embedExp() : { } { - ( <ENCODE> ) - { return new EncodeExpression(encoder); } + ( <EMBED> ) + { return new EmbedExpression(embedder); } } Expression exactExp() : { } @@ -744,7 +744,7 @@ String identifier() : <ECHO> | <EXACT> | <ELSE> | - <ENCODE> | + <EMBED> | <FLATTEN> | <FOR_EACH> | <GET_FIELD> | diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java index 32e38dbee6f..06d185339a6 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.indexinglanguage; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.expressions.EchoExpression; import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; @@ -96,7 +96,7 @@ public class ScriptParserTestCase { } private static ScriptParserContext newContext(String input) { - return new ScriptParserContext(new SimpleLinguistics(), Encoder.throwsOnUse).setInputStream(new IndexingInput(input)); + return new ScriptParserContext(new SimpleLinguistics(), Embedder.throwsOnUse).setInputStream(new IndexingInput(input)); } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java index 9d3d0abb256..188426b1a06 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java @@ -5,26 +5,20 @@ import com.yahoo.document.DataType; import com.yahoo.document.Document; import com.yahoo.document.DocumentType; import com.yahoo.document.Field; -import com.yahoo.document.FieldPath; import com.yahoo.document.TensorDataType; import com.yahoo.document.datatypes.BoolFieldValue; -import com.yahoo.document.datatypes.FieldValue; import com.yahoo.document.datatypes.StringFieldValue; import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.language.Language; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.tensor.Tensor; -import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.parser.ParseException; import org.junit.Test; -import java.util.Iterator; import java.util.List; -import java.util.Map; -import java.util.Set; import static org.junit.Assert.*; @@ -106,9 +100,9 @@ public class ScriptTestCase { } @Test - public void testEncode() throws ParseException { + public void testEmbed() throws ParseException { TensorType tensorType = TensorType.fromSpec("tensor(d[4])"); - var expression = Expression.fromString("input myText | encode | attribute 'myTensor'", + var expression = Expression.fromString("input myText | embed | attribute 'myTensor'", new SimpleLinguistics(), new MockEncoder()); @@ -131,15 +125,15 @@ public class ScriptTestCase { ((TensorFieldValue)adapter.values.get("myTensor")).getTensor().get()); } - private static class MockEncoder implements Encoder { + private static class MockEncoder implements Embedder { @Override - public List<Integer> encode(String text, Language language) { + public List<Integer> embed(String text, Language language) { return null; } @Override - public Tensor encode(String text, Language language, TensorType tensorType) { + public Tensor embed(String text, Language language, TensorType tensorType) { return Tensor.from(tensorType, "[7,3,0,0]"); } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java index 2a71aeb564c..ea0d9f9cf69 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.indexinglanguage.parser; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -19,7 +19,7 @@ public class DefaultFieldNameTestCase { public void requireThatDefaultFieldNameIsAppliedWhenArgumentIsMissing() throws ParseException { IndexingInput input = new IndexingInput("input"); InputExpression exp = (InputExpression)Expression.newInstance(new ScriptParserContext(new SimpleLinguistics(), - Encoder.throwsOnUse) + Embedder.throwsOnUse) .setInputStream(input) .setDefaultFieldName("foo")); assertEquals("foo", exp.getFieldName()); diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java index d7c5ae5c15a..44aa562028c 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java @@ -2,7 +2,7 @@ package com.yahoo.vespa.indexinglanguage.parser; import com.yahoo.language.Linguistics; -import com.yahoo.language.process.Encoder; +import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.expressions.*; import org.junit.Test; @@ -85,9 +85,9 @@ public class ExpressionTestCase { private static void assertExpression(Class expectedClass, String str) throws ParseException { Linguistics linguistics = new SimpleLinguistics(); - Expression foo = Expression.fromString(str, linguistics, Encoder.throwsOnUse); + Expression foo = Expression.fromString(str, linguistics, Embedder.throwsOnUse); assertEquals(expectedClass, foo.getClass()); - Expression bar = Expression.fromString(foo.toString(), linguistics, Encoder.throwsOnUse); + Expression bar = Expression.fromString(foo.toString(), linguistics, Embedder.throwsOnUse); assertEquals(foo.hashCode(), bar.hashCode()); assertEquals(foo, bar); } |