diff options
31 files changed, 276 insertions, 72 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java index ac92207820c..9c89517f72d 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/document/SDField.java @@ -9,6 +9,7 @@ import com.yahoo.document.MapDataType; import com.yahoo.document.StructDataType; import com.yahoo.document.TensorDataType; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.searchdefinition.Index; import com.yahoo.searchdefinition.Search; @@ -425,12 +426,12 @@ public class SDField extends Field implements TypedKey, FieldOperationContainer, /** Parse an indexing expression which will use the simple linguistics implementatino suitable for testing */ public void parseIndexingScript(String script) { - parseIndexingScript(script, new SimpleLinguistics()); + parseIndexingScript(script, new SimpleLinguistics(), Encoder.throwsOnUse); } - public void parseIndexingScript(String script, Linguistics linguistics) { + public void parseIndexingScript(String script, Linguistics linguistics, Encoder encoder) { try { - ScriptParserContext config = new ScriptParserContext(linguistics); + ScriptParserContext config = new ScriptParserContext(linguistics, encoder); config.setInputStream(new IndexingInput(script)); setIndexingScript(ScriptExpression.newInstance(config)); } catch (ParseException e) { diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java index dece0064fcc..4332d8baea8 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexingOperation.java @@ -2,6 +2,7 @@ package com.yahoo.searchdefinition.fieldoperation; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.searchdefinition.document.SDField; import com.yahoo.searchdefinition.parser.ParseException; @@ -27,14 +28,14 @@ public class IndexingOperation implements FieldOperation { } /** Creates an indexing operation which will use the simple linguistics implementation suitable for testing */ - @SuppressWarnings("deprecation") public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine) throws ParseException { - return fromStream(input, multiLine, new SimpleLinguistics()); + return fromStream(input, multiLine, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine, Linguistics linguistics) + public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine, + Linguistics linguistics, Encoder encoder) throws ParseException { - ScriptParserContext config = new ScriptParserContext(linguistics); + ScriptParserContext config = new ScriptParserContext(linguistics, encoder); config.setAnnotatorConfig(new AnnotatorConfig()); config.setInputStream(input); ScriptExpression exp; diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index 4d7b31e6e50..7df77588fe8 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -58,6 +58,7 @@ import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.application.api.FileRegistry; import com.yahoo.config.model.api.ModelContext; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.query.ranking.Diversity; import java.util.Map; @@ -111,7 +112,7 @@ public class SDParser { */ @SuppressWarnings("deprecation") private IndexingOperation newIndexingOperation(boolean multiline) throws ParseException { - return newIndexingOperation(multiline, new SimpleLinguistics()); + return newIndexingOperation(multiline, new SimpleLinguistics(), Encoder.throwsOnUse); } /** @@ -120,13 +121,13 @@ public class SDParser { * @param multiline Whether or not to allow multi-line expressions. * @param linguistics What to use for tokenizing. */ - private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics) throws ParseException { + private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Encoder encoder) throws ParseException { SimpleCharStream input = (SimpleCharStream)token_source.input_stream; if (token.next != null) { input.backup(token.next.image.length()); } try { - return IndexingOperation.fromStream(input, multiline, linguistics); + return IndexingOperation.fromStream(input, multiline, linguistics, encoder); } finally { token.next = null; jj_ntk = -1; diff --git a/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java b/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java index 9b07ee55bd8..f8550d04d1c 100644 --- a/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java +++ b/container-core/src/main/java/com/yahoo/language/provider/DefaultEncoderProvider.java @@ -19,30 +19,13 @@ import java.util.List; @SuppressWarnings("unused") // Injected public class DefaultEncoderProvider implements Provider<Encoder> { - // Use lazy initialization to avoid expensive (memory-wise) instantiation - private static final Encoder failingEncoder = new FailingEncoder(); - @Inject public DefaultEncoderProvider() { } @Override - public Encoder get() { return failingEncoder; } + public Encoder get() { return Encoder.throwsOnUse; } @Override public void deconstruct() {} - public static class FailingEncoder implements Encoder { - - @Override - public List<Integer> encode(String text, Language language) { - throw new IllegalStateException("No encoder has been configured"); - } - - @Override - public Tensor encode(String text, Language language, TensorType tensorType) { - throw new IllegalStateException("No encoder has been configured"); - } - - } - } diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java index 285f3e83602..53709c4ff87 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java @@ -20,6 +20,8 @@ import com.yahoo.document.DocumentUpdate; import com.yahoo.document.config.DocumentmanagerConfig; import com.yahoo.language.Linguistics; import java.util.logging.Level; + +import com.yahoo.language.process.Encoder; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.indexinglanguage.AdapterFactory; import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory; @@ -52,9 +54,10 @@ public class IndexingProcessor extends DocumentProcessor { @Inject public IndexingProcessor(DocumentmanagerConfig documentmanagerConfig, IlscriptsConfig ilscriptsConfig, - Linguistics linguistics) { + Linguistics linguistics, + Encoder encoder) { docTypeMgr = DocumentTypeManagerConfigurer.configureNewManager(documentmanagerConfig); - scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics); + scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics, encoder); adapterFactory = new SimpleAdapterFactory(new ExpressionSelector()); } diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java index a148966c250..8f3f75af795 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java @@ -5,6 +5,8 @@ import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; import com.yahoo.language.Linguistics; import java.util.logging.Level; + +import com.yahoo.language.process.Encoder; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; @@ -26,9 +28,9 @@ public class ScriptManager { private final Map<String, Map<String, DocumentScript>> documentFieldScripts; private final DocumentTypeManager docTypeMgr; - public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics) { + public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics, Encoder encoder) { this.docTypeMgr = docTypeMgr; - documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics); + documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics, encoder); } @@ -72,9 +74,10 @@ public class ScriptManager { private static Map<String, Map<String, DocumentScript>> createScriptsMap(DocumentTypeManager docTypeMgr, IlscriptsConfig config, - Linguistics linguistics) { + Linguistics linguistics, + Encoder encoder) { Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size()); - ScriptParserContext parserContext = new ScriptParserContext(linguistics); + ScriptParserContext parserContext = new ScriptParserContext(linguistics, encoder); parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences()); parserContext.getAnnotatorConfig().setMaxTokenLength(config.fieldmatchmaxlength()); diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java index 23df8c0eb25..607fee4f10d 100644 --- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java +++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java @@ -3,6 +3,7 @@ package com.yahoo.docprocs.indexing; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; +import com.yahoo.language.process.Encoder; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.indexinglanguage.parser.ParseException; import org.junit.Test; @@ -28,7 +29,7 @@ public class ScriptManagerTestCase { IlscriptsConfig.Builder config = new IlscriptsConfig.Builder(); config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newssummary") .content("index")); - ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null); + ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse); assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newsarticle"))); assertNull(scriptMgr.getScript(new DocumentType("unknown"))); } @@ -43,7 +44,7 @@ public class ScriptManagerTestCase { IlscriptsConfig.Builder config = new IlscriptsConfig.Builder(); config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newsarticle") .content("index")); - ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null); + ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse); assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newssummary"))); assertNull(scriptMgr.getScript(new DocumentType("unknown"))); } @@ -52,7 +53,7 @@ public class ScriptManagerTestCase { public void requireThatEmptyConfigurationDoesNotThrow() { DocumentTypeManager typeMgr = new DocumentTypeManager(); typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg"); - ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null); + ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Encoder.throwsOnUse); assertNull(scriptMgr.getScript(new DocumentType("unknown"))); } @@ -60,7 +61,7 @@ public class ScriptManagerTestCase { public void requireThatUnknownDocumentTypeReturnsNull() { DocumentTypeManager typeMgr = new DocumentTypeManager(); typeMgr.configure("file:src/test/cfg/documentmanager_inherit.cfg"); - ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null); + ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(new IlscriptsConfig.Builder()), null, Encoder.throwsOnUse); for (Iterator<DocumentType> it = typeMgr.documentTypeIterator(); it.hasNext(); ) { assertNull(scriptMgr.getScript(it.next())); } diff --git a/document/src/main/java/com/yahoo/document/TensorDataType.java b/document/src/main/java/com/yahoo/document/TensorDataType.java index ae1ccc712ee..ad4a856d964 100644 --- a/document/src/main/java/com/yahoo/document/TensorDataType.java +++ b/document/src/main/java/com/yahoo/document/TensorDataType.java @@ -21,7 +21,7 @@ public class TensorDataType extends DataType { public static int classId = registerClass(Ids.document + 59, TensorDataType.class); public TensorDataType(TensorType tensorType) { - super(tensorType.toString(), DataType.tensorDataTypeCode); + super(tensorType == null ? "tensor" : tensorType.toString(), DataType.tensorDataTypeCode); this.tensorType = tensorType; } diff --git a/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java b/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java index 74b94b8135e..fc48a0a80ee 100644 --- a/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java +++ b/document/src/main/java/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.java @@ -11,9 +11,10 @@ import com.yahoo.document.serialization.DocumentUpdateReader; import com.yahoo.document.serialization.VespaDocumentSerializer6; /** - * @author <a href="mailto:thomasg@yahoo-inc.com">Thomas Gundersen</a> + * @author Thomas Gundersen */ public class AddFieldPathUpdate extends FieldPathUpdate { + class IteratorHandler extends FieldPathIteratorHandler { Array newValues; diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java index 662d221d3a6..34da5b47655 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParser.java @@ -55,13 +55,14 @@ public final class ScriptParser { T call(IndexingParser parser) throws ParseException; } - private static <T extends Expression> T parse(ScriptParserContext config, ParserMethod<T> method) + private static <T extends Expression> T parse(ScriptParserContext context, ParserMethod<T> method) throws ParseException { - CharStream input = config.getInputStream(); + CharStream input = context.getInputStream(); IndexingParser parser = new IndexingParser(input); - parser.setAnnotatorConfig(config.getAnnotatorConfig()); - parser.setDefaultFieldName(config.getDefaultFieldName()); - parser.setLinguistics(config.getLinguistcs()); + parser.setAnnotatorConfig(context.getAnnotatorConfig()); + parser.setDefaultFieldName(context.getDefaultFieldName()); + parser.setLinguistics(context.getLinguistcs()); + parser.setEncoder(context.getEncoder()); try { return method.call(parser); } catch (ParseException e) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java index e1071e25042..06be91703fa 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/ScriptParserContext.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.indexinglanguage; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.vespa.indexinglanguage.parser.CharStream; @@ -13,11 +14,13 @@ public class ScriptParserContext { private AnnotatorConfig annotatorConfig = new AnnotatorConfig(); private Linguistics linguistics; + private final Encoder encoder; private String defaultFieldName = null; private CharStream inputStream = null; - public ScriptParserContext(Linguistics linguistics) { + public ScriptParserContext(Linguistics linguistics, Encoder encoder) { this.linguistics = linguistics; + this.encoder = encoder; } public AnnotatorConfig getAnnotatorConfig() { @@ -38,6 +41,10 @@ public class ScriptParserContext { return this; } + public Encoder getEncoder() { + return encoder; + } + public String getDefaultFieldName() { return defaultFieldName; } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java index 02abd13f5db..dbf50dbf9a7 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ClearStateExpression.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; /** * @author Simon Thoresen Hult diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java new file mode 100644 index 00000000000..09034659ad0 --- /dev/null +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.indexinglanguage.expressions; + +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.language.process.Encoder; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +/** + * Encodes a string as a tensor using the configured Encoder component + * + * @author bratseth + */ +public class EncodeExpression extends Expression { + + private final Encoder encoder; + + /** The target type we are encoding to. Set during verification. */ + private TensorType targetType; + + public EncodeExpression(Encoder encoder) { + super(DataType.STRING); + this.encoder = encoder; + } + + @Override + protected void doExecute(ExecutionContext context) { + StringFieldValue input = (StringFieldValue) context.getValue(); + Tensor tensor = encoder.encode(input.getString(), context.getLanguage(), targetType); + context.setValue(new TensorFieldValue(tensor)); + } + + @Override + protected void doVerify(VerificationContext context) { + String outputField = context.getOutputField(); + if (outputField == null) + throw new VerificationException(this, "No output field in this statement: " + + "Don't know what tensor type to encode to."); + DataType outputFieldType = context.getInputType(this, outputField); + if ( ! (outputFieldType instanceof TensorDataType) ) + throw new VerificationException(this, "The type of the output field " + outputField + + " is not a tensor but " + outputField); + targetType = ((TensorDataType) outputFieldType).getTensorType(); + context.setValueType(createdOutputType()); + } + + @Override + public DataType createdOutputType() { + return new TensorDataType(targetType); + } + + @Override + public String toString() { return "encode"; } + + @Override + public int hashCode() { return 1; } + + @Override + public boolean equals(Object o) { return o instanceof EncodeExpression; } + +} diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java index 6be1f78d376..1aec13bff50 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExecutionContext.java @@ -136,4 +136,5 @@ public class ExecutionContext implements FieldTypeAdapter, FieldValueAdapter, Cl value = null; return this; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java index da43ff2f2a3..a121df8e5a8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java @@ -6,6 +6,7 @@ import com.yahoo.document.Document; import com.yahoo.document.DocumentUpdate; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.*; import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; @@ -185,11 +186,11 @@ public abstract class Expression extends Selectable { /** Creates an expression with simple lingustics for testing */ public static Expression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static Expression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static Expression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static Expression newInstance(ScriptParserContext context) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java index 2abed23b0d0..0ac195efb5d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; import com.yahoo.vespa.objects.ObjectOperation; import com.yahoo.vespa.objects.ObjectPredicate; diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java index 731aa5bf7c3..398c2751bd8 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java @@ -57,4 +57,5 @@ public abstract class OutputExpression extends Expression { public int hashCode() { return getClass().hashCode() + (fieldName != null ? fieldName.hashCode() : 0); } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java index 035de999962..7317cb2216f 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ScriptExpression.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; import com.yahoo.document.datatypes.FieldValue; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -91,11 +92,11 @@ public final class ScriptExpression extends ExpressionList<StatementExpression> /** Creates an expression with simple lingustics for testing */ @SuppressWarnings("deprecation") public static ScriptExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static ScriptExpression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static ScriptExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static ScriptExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java index b640c43f3b2..f5354938c67 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParser; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; @@ -20,8 +22,11 @@ import java.util.List; */ public final class StatementExpression extends ExpressionList<Expression> { + /** The name of the (last) output field tthis statement will write to, or null if none */ + private String outputField; + /** The type of the output created by this statement, or null if no output */ - private final DataType outputType; + private DataType outputType; public StatementExpression(Expression... lst) { this(Arrays.asList(lst)); @@ -38,7 +43,7 @@ public final class StatementExpression extends ExpressionList<Expression> { @Override protected void doExecute(ExecutionContext context) { - context.setOutputType(createdOutputType()); + context.setOutputType(outputType); for (Expression exp : this) { context.execute(exp); } @@ -46,9 +51,14 @@ public final class StatementExpression extends ExpressionList<Expression> { @Override protected void doVerify(VerificationContext context) { - for (Expression exp : this) { - context.execute(exp); + for (Expression expression : this) { + if (expression instanceof OutputExpression) + outputField = ((OutputExpression)expression).getFieldName(); } + context.setOutputField(outputField); + for (Expression expression : this) + context.execute(expression); + outputType = context.getValueType(); } private static DataType resolveInputType(Iterable<Expression> lst) { @@ -98,11 +108,11 @@ public final class StatementExpression extends ExpressionList<Expression> { /** Creates an expression with simple lingustics for testing */ public static StatementExpression fromString(String expression) throws ParseException { - return fromString(expression, new SimpleLinguistics()); + return fromString(expression, new SimpleLinguistics(), Encoder.throwsOnUse); } - public static StatementExpression fromString(String expression, Linguistics linguistics) throws ParseException { - return newInstance(new ScriptParserContext(linguistics).setInputStream(new IndexingInput(expression))); + public static StatementExpression fromString(String expression, Linguistics linguistics, Encoder encoder) throws ParseException { + return newInstance(new ScriptParserContext(linguistics, encoder).setInputStream(new IndexingInput(expression))); } public static StatementExpression newInstance(ScriptParserContext config) throws ParseException { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java index 8fc582a01af..7c467b1e08d 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ThisExpression.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.indexinglanguage.expressions; import com.yahoo.document.DataType; -import com.yahoo.document.DocumentType; /** * @author Simon Thoresen Hult diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java index ef08c01633a..1847db0f29e 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/UnresolvedDataType.java @@ -19,4 +19,5 @@ final class UnresolvedDataType extends PrimitiveDataType { public boolean isValueCompatible(FieldValue value) { return value != null; } + } diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java index 601d751739d..61d796e5356 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/VerificationContext.java @@ -14,6 +14,7 @@ public class VerificationContext implements FieldTypeAdapter, Cloneable { private final Map<String, DataType> variables = new HashMap<String, DataType>(); private final FieldTypeAdapter adapter; private DataType value; + private String outputField; public VerificationContext() { this.adapter = null; @@ -49,16 +50,25 @@ public class VerificationContext implements FieldTypeAdapter, Cloneable { return this; } - /** Returns the output type that will result from executing the statement verified by this */ public DataType getValueType() { return value; } + /** Sets the output value type */ public VerificationContext setValueType(DataType value) { this.value = value; return this; } + /** Sets the name of the (last) output field of the statement this is executed as a part of */ + public void setOutputField(String outputField) { this.outputField = outputField; } + + /** + * Returns the name of the (last) output field of the statement this is executed as a part of, + * or null if none or not yet verified + */ + public String getOutputField() { return outputField; } + public VerificationContext clear() { variables.clear(); value = null; diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 632a19a2c6c..4533a17954c 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -34,6 +34,7 @@ import com.yahoo.text.StringUtilities; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.language.process.StemMode; +import com.yahoo.language.process.Encoder; import com.yahoo.language.Linguistics; /** @@ -44,6 +45,7 @@ public class IndexingParser { private String defaultFieldName; private Linguistics linguistics; + private Encoder encoder; private AnnotatorConfig annotatorCfg; public IndexingParser(String str) { @@ -60,6 +62,11 @@ public class IndexingParser { return this; } + public IndexingParser setEncoder(Encoder encoder) { + this.encoder = encoder; + return this; + } + public IndexingParser setAnnotatorConfig(AnnotatorConfig cfg) { annotatorCfg = cfg; return this; @@ -150,6 +157,7 @@ TOKEN : <CREATE_IF_NON_EXISTENT: "create_if_non_existent"> | <ECHO: "echo"> | <ELSE: "else"> | + <ENCODE: "encode"> | <EXACT: "exact"> | <FLATTEN: "flatten"> | <FOR_EACH: "for_each"> | @@ -275,6 +283,7 @@ Expression value() : val = base64EncodeExp() | val = clearStateExp() | val = echoExp() | + val = encodeExp() | val = exactExp() | val = flattenExp() | val = forEachExp() | @@ -356,6 +365,12 @@ Expression echoExp() : { } { return new EchoExpression(); } } +Expression encodeExp() : { } +{ + ( <ENCODE> ) + { return new EncodeExpression(encoder); } +} + Expression exactExp() : { } { ( <EXACT> ) @@ -729,6 +744,7 @@ String identifier() : <ECHO> | <EXACT> | <ELSE> | + <ENCODE> | <FLATTEN> | <FOR_EACH> | <GET_FIELD> | diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java index 0df3073cd25..32e38dbee6f 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptParserTestCase.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.indexinglanguage; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.expressions.EchoExpression; import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; @@ -95,7 +96,7 @@ public class ScriptParserTestCase { } private static ScriptParserContext newContext(String input) { - return new ScriptParserContext(new SimpleLinguistics()).setInputStream(new IndexingInput(input)); + return new ScriptParserContext(new SimpleLinguistics(), Encoder.throwsOnUse).setInputStream(new IndexingInput(input)); } } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java index 14a8f40c46c..9d3d0abb256 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/ScriptTestCase.java @@ -4,12 +4,28 @@ package com.yahoo.vespa.indexinglanguage; import com.yahoo.document.DataType; import com.yahoo.document.Document; import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; +import com.yahoo.document.FieldPath; +import com.yahoo.document.TensorDataType; import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.FieldValue; import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.language.Language; +import com.yahoo.language.process.Encoder; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorAddress; +import com.yahoo.tensor.TensorType; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.parser.ParseException; import org.junit.Test; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + import static org.junit.Assert.*; /** @@ -84,10 +100,49 @@ public class ScriptTestCase { Document input = new Document(type, "id:scheme:mytype::"); input.setFieldValue("in-1", new StringFieldValue("foo")); var expression = Expression.fromString("if (input 'in-1' == \"foo\") { true | summary 'mybool' | attribute 'mybool' }"); - System.out.println(expression); Document output = Expression.execute(expression, input); assertNotNull(output); assertEquals(new BoolFieldValue(true), output.getFieldValue("mybool")); } + @Test + public void testEncode() throws ParseException { + TensorType tensorType = TensorType.fromSpec("tensor(d[4])"); + var expression = Expression.fromString("input myText | encode | attribute 'myTensor'", + new SimpleLinguistics(), + new MockEncoder()); + + SimpleTestAdapter adapter = new SimpleTestAdapter(); + adapter.createField(new Field("myText", DataType.STRING)); + adapter.createField(new Field("myTensor", new TensorDataType(tensorType))); + adapter.setValue("myText", new StringFieldValue("input text")); + + // Necessary to resolve output type + VerificationContext verificationContext = new VerificationContext(adapter); + assertEquals(TensorDataType.class, expression.verify(verificationContext).getClass()); + + ExecutionContext context = new ExecutionContext(adapter); + context.setValue(new StringFieldValue("input text")); + expression.execute(context); + assertNotNull(context); + //assertTrue(context.getOutputType() instanceof TensorDataType); + assertTrue(adapter.values.containsKey("myTensor")); + assertEquals(Tensor.from(tensorType, "[7,3,0,0]"), + ((TensorFieldValue)adapter.values.get("myTensor")).getTensor().get()); + } + + private static class MockEncoder implements Encoder { + + @Override + public List<Integer> encode(String text, Language language) { + return null; + } + + @Override + public Tensor encode(String text, Language language, TensorType tensorType) { + return Tensor.from(tensorType, "[7,3,0,0]"); + } + + } + } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java index 5d1cc514d35..892b4ebecb2 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/SimpleTestAdapter.java @@ -17,8 +17,8 @@ import java.util.Map; */ public class SimpleTestAdapter implements FieldValueAdapter { - private final Map<String, DataType> types = new HashMap<String, DataType>(); - private final Map<String, FieldValue> values = new HashMap<String, FieldValue>(); + final Map<String, DataType> types = new HashMap<>(); + final Map<String, FieldValue> values = new HashMap<>(); public SimpleTestAdapter(Field... fields) { for (Field field : fields) { @@ -58,9 +58,15 @@ public class SimpleTestAdapter implements FieldValueAdapter { } } + public SimpleTestAdapter setValue(String fieldName, FieldValue fieldValue) { + values.put(fieldName, fieldValue); + return this; + } + @Override public SimpleTestAdapter setOutputValue(Expression exp, String fieldName, FieldValue fieldValue) { values.put(fieldName, fieldValue); return this; } + } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java index 77998a9ac05..2a71aeb564c 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/DefaultFieldNameTestCase.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.indexinglanguage.parser; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.ScriptParserContext; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -17,9 +18,11 @@ public class DefaultFieldNameTestCase { @Test public void requireThatDefaultFieldNameIsAppliedWhenArgumentIsMissing() throws ParseException { IndexingInput input = new IndexingInput("input"); - InputExpression exp = (InputExpression)Expression.newInstance(new ScriptParserContext(new SimpleLinguistics()) + InputExpression exp = (InputExpression)Expression.newInstance(new ScriptParserContext(new SimpleLinguistics(), + Encoder.throwsOnUse) .setInputStream(input) .setDefaultFieldName("foo")); assertEquals("foo", exp.getFieldName()); } + } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java index 223c8191186..d7c5ae5c15a 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/parser/ExpressionTestCase.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.indexinglanguage.parser; import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Encoder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.vespa.indexinglanguage.expressions.*; import org.junit.Test; @@ -84,9 +85,9 @@ public class ExpressionTestCase { private static void assertExpression(Class expectedClass, String str) throws ParseException { Linguistics linguistics = new SimpleLinguistics(); - Expression foo = Expression.fromString(str, linguistics); + Expression foo = Expression.fromString(str, linguistics, Encoder.throwsOnUse); assertEquals(expectedClass, foo.getClass()); - Expression bar = Expression.fromString(foo.toString(), linguistics); + Expression bar = Expression.fromString(foo.toString(), linguistics, Encoder.throwsOnUse); assertEquals(foo.hashCode(), bar.hashCode()); assertEquals(foo, bar); } diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index e8687b5c9f4..dc7450678c5 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -328,6 +328,21 @@ ], "fields": [] }, + "com.yahoo.language.process.Encoder$FailingEncoder": { + "superClass": "java.lang.Object", + "interfaces": [ + "com.yahoo.language.process.Encoder" + ], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>()", + "public java.util.List encode(java.lang.String, com.yahoo.language.Language)", + "public com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)" + ], + "fields": [] + }, "com.yahoo.language.process.Encoder": { "superClass": "java.lang.Object", "interfaces": [], @@ -340,7 +355,9 @@ "public abstract java.util.List encode(java.lang.String, com.yahoo.language.Language)", "public abstract com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)" ], - "fields": [] + "fields": [ + "public static final com.yahoo.language.process.Encoder throwsOnUse" + ] }, "com.yahoo.language.process.GramSplitter$Gram": { "superClass": "java.lang.Object", diff --git a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java b/linguistics/src/main/java/com/yahoo/language/process/Encoder.java index 91de16f669b..27f73d15e54 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Encoder.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Encoder.java @@ -14,6 +14,9 @@ import java.util.List; */ public interface Encoder { + /** An instance of this which throws IllegalStateException if attempted used */ + Encoder throwsOnUse = new FailingEncoder(); + /** * Encodes text into tokens in a list of ids. * @@ -36,4 +39,18 @@ public interface Encoder { */ Tensor encode(String text, Language language, TensorType tensorType); + class FailingEncoder implements Encoder { + + @Override + public List<Integer> encode(String text, Language language) { + throw new IllegalStateException("No encoder has been configured"); + } + + @Override + public Tensor encode(String text, Language language, TensorType tensorType) { + throw new IllegalStateException("No encoder has been configured"); + } + + } + } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java index 7867a53b8da..4a714925397 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorType.java @@ -187,7 +187,7 @@ public class TensorType { } /** - * Returns whether or not this type can simply be renamed to + * Returns whether this type can simply be renamed to * the given type. This is the same as being assignable, but disregarding * dimension names. */ |