aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-09-27 23:09:03 +0200
committerGitHub <noreply@github.com>2021-09-27 23:09:03 +0200
commit2df97d23d9f25ae60f010a2e9f273cb5b38e049b (patch)
treed2923a45682e91d80e7011c60cfb301e05acead3
parent037f756caf4cfb99bcd988174839d7bc385267b9 (diff)
parent8f3fb1a105ded07144f6de527266a438e48a1766 (diff)
Merge pull request #19294 from vespa-engine/bratseth/linguistics-componentsv7.473.17
Bratseth/linguistics components
-rw-r--r--CMakeLists.txt1
-rw-r--r--container-disc/pom.xml1
-rw-r--r--dist/vespa.spec1
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java10
-rw-r--r--docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java4
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java5
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java2
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java6
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/GuardExpression.java5
-rw-r--r--indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java23
-rw-r--r--linguistics-components/.gitignore5
-rw-r--r--linguistics-components/CMakeLists.txt5
-rw-r--r--linguistics-components/OWNERS2
-rw-r--r--linguistics-components/README4
-rw-r--r--linguistics-components/abi-spec.json189
-rw-r--r--linguistics-components/pom.xml80
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/Model.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Scoring.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/Scoring.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/TokenType.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/TokenType.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java)0
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/sentencepiece/package-info.java (renamed from linguistics/src/main/java/com/yahoo/language/sentencepiece/package-info.java)2
-rw-r--r--linguistics-components/src/main/protobuf/sentencepiece_model.proto (renamed from linguistics/src/main/protobuf/sentencepiece_model.proto)0
-rw-r--r--linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def (renamed from linguistics/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def)2
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java (renamed from linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java)0
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java (renamed from linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java)0
-rw-r--r--linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java (renamed from linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java)0
-rw-r--r--linguistics-components/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model (renamed from linguistics/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model)bin400869 -> 400869 bytes
-rw-r--r--linguistics-components/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model (renamed from linguistics/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model)bin300865 -> 300865 bytes
-rw-r--r--linguistics/abi-spec.json187
-rw-r--r--linguistics/pom.xml4
-rw-r--r--pom.xml1
34 files changed, 342 insertions, 197 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7d9968b6329..c9980fb1928 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,6 +95,7 @@ add_subdirectory(jdisc_jetty)
add_subdirectory(jrt_test)
add_subdirectory(juniper)
add_subdirectory(linguistics)
+add_subdirectory(linguistics-components)
add_subdirectory(logd)
add_subdirectory(logserver)
add_subdirectory(logforwarder)
diff --git a/container-disc/pom.xml b/container-disc/pom.xml
index e3b2830a974..a362fb11be6 100644
--- a/container-disc/pom.xml
+++ b/container-disc/pom.xml
@@ -174,6 +174,7 @@
docprocs-jar-with-dependencies.jar,
hosted-zone-api-jar-with-dependencies.jar,
jdisc-security-filters-jar-with-dependencies.jar,
+ linguistics-components-jar-with-dependencies.jar,
vespaclient-container-plugin-jar-with-dependencies.jar,
vespa-athenz-jar-with-dependencies.jar,
container-apache-http-client-bundle-jar-with-dependencies.jar, <!-- Apache http client repackaged as bundle -->
diff --git a/dist/vespa.spec b/dist/vespa.spec
index e45c45d41f9..1fa9fbc9796 100644
--- a/dist/vespa.spec
+++ b/dist/vespa.spec
@@ -814,6 +814,7 @@ fi
%{_prefix}/lib/jars/jdisc_core-jar-with-dependencies.jar
%{_prefix}/lib/jars/jdisc-security-filters-jar-with-dependencies.jar
%{_prefix}/lib/jars/jersey-*.jar
+%{_prefix}/lib/jars/linguistics-components-jar-with-dependencies.jar
%{_prefix}/lib/jars/alpn-*.jar
%{_prefix}/lib/jars/http2-*.jar
%{_prefix}/lib/jars/jetty-*.jar
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
index 8f3f75af795..fa5f794f652 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
@@ -10,13 +10,13 @@ import com.yahoo.language.process.Encoder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;
import java.util.*;
-import java.util.logging.Level;
/**
* @author Simon Thoresen Hult
@@ -86,11 +86,17 @@ public class ScriptManager {
List<StatementExpression> expressions = new ArrayList<>(ilscript.content().size());
Map<String, DocumentScript> fieldScripts = new HashMap<>(ilscript.content().size());
for (String content : ilscript.content()) {
- expressions.add(parse(ilscript.doctype(), parserContext, content));
StatementExpression statement = parse(ilscript.doctype(), parserContext, content);
+ expressions.add(statement);
InputExpression.InputFieldNameExtractor inputFieldNameExtractor = new InputExpression.InputFieldNameExtractor();
statement.select(inputFieldNameExtractor, inputFieldNameExtractor);
+ OutputExpression.OutputFieldNameExtractor outputFieldNameExtractor = new OutputExpression.OutputFieldNameExtractor();
+ statement.select(outputFieldNameExtractor, outputFieldNameExtractor);
statement.select(fieldPathOptimizer, fieldPathOptimizer);
+ if ( ! outputFieldNameExtractor.getOutputFieldNames().isEmpty()) {
+ String outputFieldName = outputFieldNameExtractor.getOutputFieldNames().get(0);
+ statement.setStatementOutputType(docTypeMgr.getDocumentType(ilscript.doctype()).getField(outputFieldName).getDataType());
+ }
if (inputFieldNameExtractor.getInputFieldNames().size() == 1) {
String fieldName = inputFieldNameExtractor.getInputFieldNames().get(0);
ScriptExpression script;
diff --git a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
index 607fee4f10d..ec05fcbe422 100644
--- a/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
+++ b/docprocs/src/test/java/com/yahoo/docprocs/indexing/ScriptManagerTestCase.java
@@ -28,7 +28,7 @@ public class ScriptManagerTestCase {
IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newssummary")
- .content("index"));
+ .content("input title | index title"));
ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse);
assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newsarticle")));
assertNull(scriptMgr.getScript(new DocumentType("unknown")));
@@ -43,7 +43,7 @@ public class ScriptManagerTestCase {
IlscriptsConfig.Builder config = new IlscriptsConfig.Builder();
config.ilscript(new IlscriptsConfig.Ilscript.Builder().doctype("newsarticle")
- .content("index"));
+ .content("input title | index title"));
ScriptManager scriptMgr = new ScriptManager(typeMgr, new IlscriptsConfig(config), null, Encoder.throwsOnUse);
assertNotNull(scriptMgr.getScript(typeMgr.getDocumentType("newssummary")));
assertNull(scriptMgr.getScript(new DocumentType("unknown")));
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java
index 09034659ad0..f84da9ddef8 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/EncodeExpression.java
@@ -27,6 +27,11 @@ public class EncodeExpression extends Expression {
}
@Override
+ public void setStatementOutputType(DataType type) {
+ targetType = ((TensorDataType)type).getTensorType();
+ }
+
+ @Override
protected void doExecute(ExecutionContext context) {
StringFieldValue input = (StringFieldValue) context.getValue();
Tensor tensor = encoder.encode(input.getString(), context.getLanguage(), targetType);
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
index a121df8e5a8..67459c2b035 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/Expression.java
@@ -31,6 +31,8 @@ public abstract class Expression extends Selectable {
this.inputType = inputType;
}
+ public void setStatementOutputType(DataType type) {}
+
public final FieldValue execute(FieldValue val) {
return execute(new ExecutionContext().setValue(val));
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java
index 0ac195efb5d..0f7c2a411de 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/ExpressionList.java
@@ -24,6 +24,12 @@ public abstract class ExpressionList<T extends Expression> extends CompositeExpr
}
}
+ @Override
+ public void setStatementOutputType(DataType type) {
+ for (Expression expression : expressions)
+ expression.setStatementOutputType(type);
+ }
+
public int size() {
return expressions.size();
}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/GuardExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/GuardExpression.java
index 179f202788c..78c261caccb 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/GuardExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/GuardExpression.java
@@ -27,6 +27,11 @@ public final class GuardExpression extends CompositeExpression {
}
@Override
+ public void setStatementOutputType(DataType type) {
+ exp.setStatementOutputType(type);
+ }
+
+ @Override
protected void doExecute(ExecutionContext context) {
if (!shouldExecute && context.getAdapter() instanceof UpdateAdapter) {
context.setValue(null);
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java
index 398c2751bd8..267fb6fc51b 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/OutputExpression.java
@@ -2,6 +2,11 @@
package com.yahoo.vespa.indexinglanguage.expressions;
import com.yahoo.document.DataType;
+import com.yahoo.vespa.objects.ObjectOperation;
+import com.yahoo.vespa.objects.ObjectPredicate;
+
+import java.util.ArrayList;
+import java.util.List;
/**
* @author Simon Thoresen Hult
@@ -58,4 +63,22 @@ public abstract class OutputExpression extends Expression {
return getClass().hashCode() + (fieldName != null ? fieldName.hashCode() : 0);
}
+ public static class OutputFieldNameExtractor implements ObjectOperation, ObjectPredicate {
+
+ private final List<String> outputFieldNames = new ArrayList<>(1);
+
+ public List<String> getOutputFieldNames() { return outputFieldNames; }
+
+ @Override
+ public void execute(Object obj) {
+ outputFieldNames.add(((OutputExpression) obj).getFieldName());
+ }
+
+ @Override
+ public boolean check(Object obj) {
+ return obj instanceof OutputExpression;
+ }
+
+ }
+
}
diff --git a/linguistics-components/.gitignore b/linguistics-components/.gitignore
new file mode 100644
index 00000000000..8b990078588
--- /dev/null
+++ b/linguistics-components/.gitignore
@@ -0,0 +1,5 @@
+target
+*.iml
+*.ipr
+*.iws
+/pom.xml.build
diff --git a/linguistics-components/CMakeLists.txt b/linguistics-components/CMakeLists.txt
new file mode 100644
index 00000000000..b53c8001959
--- /dev/null
+++ b/linguistics-components/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+install_fat_java_artifact(linguistics-components)
+
+install_config_definitions()
diff --git a/linguistics-components/OWNERS b/linguistics-components/OWNERS
new file mode 100644
index 00000000000..cd50f7a263a
--- /dev/null
+++ b/linguistics-components/OWNERS
@@ -0,0 +1,2 @@
+bratseth
+arnej27959
diff --git a/linguistics-components/README b/linguistics-components/README
new file mode 100644
index 00000000000..e26a51e2f53
--- /dev/null
+++ b/linguistics-components/README
@@ -0,0 +1,4 @@
+Java library for linguistic operations in Vespa.
+
+This API is pluggable - multiple implementations may be supplied.
+This module contains a default pure Java implementation, "simple". \ No newline at end of file
diff --git a/linguistics-components/abi-spec.json b/linguistics-components/abi-spec.json
new file mode 100644
index 00000000000..5b6729c58ef
--- /dev/null
+++ b/linguistics-components/abi-spec.json
@@ -0,0 +1,189 @@
+{
+ "com.yahoo.language.sentencepiece.Scoring": {
+ "superClass": "java.lang.Enum",
+ "interfaces": [],
+ "attributes": [
+ "public",
+ "final",
+ "enum"
+ ],
+ "methods": [
+ "public static com.yahoo.language.sentencepiece.Scoring[] values()",
+ "public static com.yahoo.language.sentencepiece.Scoring valueOf(java.lang.String)"
+ ],
+ "fields": [
+ "public static final enum com.yahoo.language.sentencepiece.Scoring highestScore",
+ "public static final enum com.yahoo.language.sentencepiece.Scoring fewestSegments"
+ ]
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Builder": {
+ "superClass": "java.lang.Object",
+ "interfaces": [
+ "com.yahoo.config.ConfigInstance$Builder"
+ ],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>()",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder collapseUnknowns(boolean)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder scoring(com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder model(com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder model(java.util.List)",
+ "public final boolean dispatchGetConfig(com.yahoo.config.ConfigInstance$Producer)",
+ "public final java.lang.String getDefMd5()",
+ "public final java.lang.String getDefName()",
+ "public final java.lang.String getDefNamespace()",
+ "public final boolean getApplyOnRestart()",
+ "public final void setApplyOnRestart(boolean)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig build()"
+ ],
+ "fields": [
+ "public java.util.List model"
+ ]
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder": {
+ "superClass": "java.lang.Object",
+ "interfaces": [
+ "com.yahoo.config.ConfigBuilder"
+ ],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>()",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Model)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder language(java.lang.String)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder path(com.yahoo.config.FileReference)",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model build()"
+ ],
+ "fields": []
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Model": {
+ "superClass": "com.yahoo.config.InnerNode",
+ "interfaces": [],
+ "attributes": [
+ "public",
+ "final"
+ ],
+ "methods": [
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder)",
+ "public java.lang.String language()",
+ "public java.nio.file.Path path()"
+ ],
+ "fields": []
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Producer": {
+ "superClass": "java.lang.Object",
+ "interfaces": [
+ "com.yahoo.config.ConfigInstance$Producer"
+ ],
+ "attributes": [
+ "public",
+ "interface",
+ "abstract"
+ ],
+ "methods": [
+ "public abstract void getConfig(com.yahoo.language.sentencepiece.SentencePieceConfig$Builder)"
+ ],
+ "fields": []
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum": {
+ "superClass": "java.lang.Enum",
+ "interfaces": [],
+ "attributes": [
+ "public",
+ "final",
+ "enum"
+ ],
+ "methods": [
+ "public static com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum[] values()",
+ "public static com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum valueOf(java.lang.String)"
+ ],
+ "fields": [
+ "public static final enum com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum highestScore",
+ "public static final enum com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum fewestSegments"
+ ]
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring": {
+ "superClass": "com.yahoo.config.EnumNode",
+ "interfaces": [],
+ "attributes": [
+ "public",
+ "final"
+ ],
+ "methods": [
+ "public void <init>()",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum)"
+ ],
+ "fields": [
+ "public static final com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum highestScore",
+ "public static final com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum fewestSegments"
+ ]
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceConfig": {
+ "superClass": "com.yahoo.config.ConfigInstance",
+ "interfaces": [],
+ "attributes": [
+ "public",
+ "final"
+ ],
+ "methods": [
+ "public static java.lang.String getDefMd5()",
+ "public static java.lang.String getDefName()",
+ "public static java.lang.String getDefNamespace()",
+ "public static java.lang.String getDefVersion()",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Builder)",
+ "public boolean collapseUnknowns()",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum scoring()",
+ "public java.util.List model()",
+ "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model model(int)"
+ ],
+ "fields": [
+ "public static final java.lang.String CONFIG_DEF_MD5",
+ "public static final java.lang.String CONFIG_DEF_NAME",
+ "public static final java.lang.String CONFIG_DEF_NAMESPACE",
+ "public static final java.lang.String CONFIG_DEF_VERSION",
+ "public static final java.lang.String[] CONFIG_DEF_SCHEMA"
+ ]
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder": {
+ "superClass": "java.lang.Object",
+ "interfaces": [],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>()",
+ "public void addModel(com.yahoo.language.Language, java.nio.file.Path)",
+ "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder addDefaultModel(java.nio.file.Path)",
+ "public java.util.Map getModels()",
+ "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setCollapseUnknowns(boolean)",
+ "public boolean getCollapseUnknowns()",
+ "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setScoring(com.yahoo.language.sentencepiece.Scoring)",
+ "public com.yahoo.language.sentencepiece.Scoring getScoring()",
+ "public com.yahoo.language.sentencepiece.SentencePieceEncoder build()"
+ ],
+ "fields": []
+ },
+ "com.yahoo.language.sentencepiece.SentencePieceEncoder": {
+ "superClass": "java.lang.Object",
+ "interfaces": [
+ "com.yahoo.language.process.Segmenter",
+ "com.yahoo.language.process.Encoder"
+ ],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig)",
+ "public void <init>(com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder)",
+ "public java.util.List segment(java.lang.String, com.yahoo.language.Language)",
+ "public java.util.List encode(java.lang.String, com.yahoo.language.Language)",
+ "public com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)",
+ "public java.lang.String normalize(java.lang.String)"
+ ],
+ "fields": []
+ }
+} \ No newline at end of file
diff --git a/linguistics-components/pom.xml b/linguistics-components/pom.xml
new file mode 100644
index 00000000000..44e58fb7588
--- /dev/null
+++ b/linguistics-components/pom.xml
@@ -0,0 +1,80 @@
+<?xml version="1.0"?>
+<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>parent</artifactId>
+ <version>7-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+ <artifactId>linguistics-components</artifactId>
+ <packaging>container-plugin</packaging>
+ <version>7-SNAPSHOT</version>
+ <dependencies>
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>component</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>linguistics</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>config-bundle</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.inject</groupId>
+ <artifactId>guice</artifactId>
+ <scope>provided</scope>
+ <classifier>no_aop</classifier>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.github.os72</groupId>
+ <artifactId>protoc-jar-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <compilerArgs>
+ <arg>-Xlint:rawtypes</arg>
+ <arg>-Xlint:unchecked</arg>
+ <arg>-Xlint:deprecation</arg>
+ <arg>-Werror</arg>
+ </compilerArgs>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>com.yahoo.vespa</groupId>
+ <artifactId>abi-check-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Model.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java
index 74f300057dc..74f300057dc 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Model.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Model.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java
index 2141505374c..2141505374c 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/ResultBuilder.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Scoring.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Scoring.java
index 6c8560abee7..6c8560abee7 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Scoring.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Scoring.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java
index 1659e3c0fa7..1659e3c0fa7 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceAlgorithm.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
index b6659ebeaa3..b6659ebeaa3 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/TokenType.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/TokenType.java
index 782030a8e4d..782030a8e4d 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/TokenType.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/TokenType.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
index 8e7c2db2ed3..8e7c2db2ed3 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/Trie.java
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/package-info.java b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/package-info.java
index 4a8673705ec..3f97277c489 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/package-info.java
+++ b/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/package-info.java
@@ -1,4 +1,4 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright 2021 Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
@ExportPackage
@PublicApi
package com.yahoo.language.sentencepiece;
diff --git a/linguistics/src/main/protobuf/sentencepiece_model.proto b/linguistics-components/src/main/protobuf/sentencepiece_model.proto
index 39626aede53..39626aede53 100644
--- a/linguistics/src/main/protobuf/sentencepiece_model.proto
+++ b/linguistics-components/src/main/protobuf/sentencepiece_model.proto
diff --git a/linguistics/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
index 0b8e5103aa3..b91c0c45dc4 100644
--- a/linguistics/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
+++ b/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def
@@ -15,4 +15,4 @@ scoring enum { highestScore, fewestSegments } default=fewestSegments
# Use "unknown" for models to be used with any language.
model[].language string
# The path to the model relative to the application package root
-model[].path path
+model[].path path \ No newline at end of file
diff --git a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
index edbbe21ec53..edbbe21ec53 100644
--- a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceConfigurationTest.java
diff --git a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
index d60d7386d4b..d60d7386d4b 100644
--- a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTest.java
diff --git a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
index 1ba7c9b472d..1ba7c9b472d 100644
--- a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
+++ b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java
diff --git a/linguistics/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model b/linguistics-components/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model
index 89f93ef3517..89f93ef3517 100644
--- a/linguistics/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model
+++ b/linguistics-components/src/test/models/sentencepiece/en.wiki.bpe.vs10000.model
Binary files differ
diff --git a/linguistics/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model b/linguistics-components/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model
index 41c0688d9df..41c0688d9df 100644
--- a/linguistics/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model
+++ b/linguistics-components/src/test/models/sentencepiece/ja.wiki.bpe.vs5000.model
Binary files differ
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index dbf4842ea1a..cfbf2abda1a 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -731,192 +731,5 @@
"public abstract java.lang.String accentDrop(java.lang.String, com.yahoo.language.Language)"
],
"fields": []
- },
- "com.yahoo.language.sentencepiece.Scoring": {
- "superClass": "java.lang.Enum",
- "interfaces": [],
- "attributes": [
- "public",
- "final",
- "enum"
- ],
- "methods": [
- "public static com.yahoo.language.sentencepiece.Scoring[] values()",
- "public static com.yahoo.language.sentencepiece.Scoring valueOf(java.lang.String)"
- ],
- "fields": [
- "public static final enum com.yahoo.language.sentencepiece.Scoring highestScore",
- "public static final enum com.yahoo.language.sentencepiece.Scoring fewestSegments"
- ]
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Builder": {
- "superClass": "java.lang.Object",
- "interfaces": [
- "com.yahoo.config.ConfigInstance$Builder"
- ],
- "attributes": [
- "public"
- ],
- "methods": [
- "public void <init>()",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder collapseUnknowns(boolean)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder scoring(com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder model(com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Builder model(java.util.List)",
- "public final boolean dispatchGetConfig(com.yahoo.config.ConfigInstance$Producer)",
- "public final java.lang.String getDefMd5()",
- "public final java.lang.String getDefName()",
- "public final java.lang.String getDefNamespace()",
- "public final boolean getApplyOnRestart()",
- "public final void setApplyOnRestart(boolean)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig build()"
- ],
- "fields": [
- "public java.util.List model"
- ]
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder": {
- "superClass": "java.lang.Object",
- "interfaces": [
- "com.yahoo.config.ConfigBuilder"
- ],
- "attributes": [
- "public"
- ],
- "methods": [
- "public void <init>()",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Model)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder language(java.lang.String)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder path(com.yahoo.config.FileReference)",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model build()"
- ],
- "fields": []
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Model": {
- "superClass": "com.yahoo.config.InnerNode",
- "interfaces": [],
- "attributes": [
- "public",
- "final"
- ],
- "methods": [
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Model$Builder)",
- "public java.lang.String language()",
- "public java.nio.file.Path path()"
- ],
- "fields": []
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Producer": {
- "superClass": "java.lang.Object",
- "interfaces": [
- "com.yahoo.config.ConfigInstance$Producer"
- ],
- "attributes": [
- "public",
- "interface",
- "abstract"
- ],
- "methods": [
- "public abstract void getConfig(com.yahoo.language.sentencepiece.SentencePieceConfig$Builder)"
- ],
- "fields": []
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum": {
- "superClass": "java.lang.Enum",
- "interfaces": [],
- "attributes": [
- "public",
- "final",
- "enum"
- ],
- "methods": [
- "public static com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum[] values()",
- "public static com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum valueOf(java.lang.String)"
- ],
- "fields": [
- "public static final enum com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum highestScore",
- "public static final enum com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum fewestSegments"
- ]
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring": {
- "superClass": "com.yahoo.config.EnumNode",
- "interfaces": [],
- "attributes": [
- "public",
- "final"
- ],
- "methods": [
- "public void <init>()",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum)"
- ],
- "fields": [
- "public static final com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum highestScore",
- "public static final com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum fewestSegments"
- ]
- },
- "com.yahoo.language.sentencepiece.SentencePieceConfig": {
- "superClass": "com.yahoo.config.ConfigInstance",
- "interfaces": [],
- "attributes": [
- "public",
- "final"
- ],
- "methods": [
- "public static java.lang.String getDefMd5()",
- "public static java.lang.String getDefName()",
- "public static java.lang.String getDefNamespace()",
- "public static java.lang.String getDefVersion()",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig$Builder)",
- "public boolean collapseUnknowns()",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Scoring$Enum scoring()",
- "public java.util.List model()",
- "public com.yahoo.language.sentencepiece.SentencePieceConfig$Model model(int)"
- ],
- "fields": [
- "public static final java.lang.String CONFIG_DEF_MD5",
- "public static final java.lang.String CONFIG_DEF_NAME",
- "public static final java.lang.String CONFIG_DEF_NAMESPACE",
- "public static final java.lang.String CONFIG_DEF_VERSION",
- "public static final java.lang.String[] CONFIG_DEF_SCHEMA"
- ]
- },
- "com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder": {
- "superClass": "java.lang.Object",
- "interfaces": [],
- "attributes": [
- "public"
- ],
- "methods": [
- "public void <init>()",
- "public void addModel(com.yahoo.language.Language, java.nio.file.Path)",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder addDefaultModel(java.nio.file.Path)",
- "public java.util.Map getModels()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setCollapseUnknowns(boolean)",
- "public boolean getCollapseUnknowns()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder setScoring(com.yahoo.language.sentencepiece.Scoring)",
- "public com.yahoo.language.sentencepiece.Scoring getScoring()",
- "public com.yahoo.language.sentencepiece.SentencePieceEncoder build()"
- ],
- "fields": []
- },
- "com.yahoo.language.sentencepiece.SentencePieceEncoder": {
- "superClass": "java.lang.Object",
- "interfaces": [
- "com.yahoo.language.process.Segmenter",
- "com.yahoo.language.process.Encoder"
- ],
- "attributes": [
- "public"
- ],
- "methods": [
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceConfig)",
- "public void <init>(com.yahoo.language.sentencepiece.SentencePieceEncoder$Builder)",
- "public java.util.List segment(java.lang.String, com.yahoo.language.Language)",
- "public java.util.List encode(java.lang.String, com.yahoo.language.Language)",
- "public com.yahoo.tensor.Tensor encode(java.lang.String, com.yahoo.language.Language, com.yahoo.tensor.TensorType)",
- "public java.lang.String normalize(java.lang.String)"
- ],
- "fields": []
}
} \ No newline at end of file
diff --git a/linguistics/pom.xml b/linguistics/pom.xml
index 221d7181616..0e5f9e15b85 100644
--- a/linguistics/pom.xml
+++ b/linguistics/pom.xml
@@ -15,10 +15,6 @@
<version>7-SNAPSHOT</version>
<dependencies>
<dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- </dependency>
- <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
diff --git a/pom.xml b/pom.xml
index 2774b8af00c..aed369720b9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,6 +94,7 @@
<module>jdisc_jetty</module>
<module>jrt</module>
<module>linguistics</module>
+ <module>linguistics-components</module>
<module>logd</module>
<module>logserver</module>
<module>messagebus</module>