From c172d09ebb17b47ebb2e75bb04d09a8533350450 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 14 Sep 2022 17:51:12 +0200 Subject: Move the BERT config def to model-integration --- .../src/vespa/embedding.bert-base-embedder.def | 27 ---------------------- model-integration/CMakeLists.txt | 4 +++- .../java/ai/vespa/embedding/BertBaseEmbedder.java | 4 ++-- .../embedding.bert-base-embedder.def | 27 ++++++++++++++++++++++ 4 files changed, 32 insertions(+), 30 deletions(-) delete mode 100644 configdefinitions/src/vespa/embedding.bert-base-embedder.def create mode 100644 model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def diff --git a/configdefinitions/src/vespa/embedding.bert-base-embedder.def b/configdefinitions/src/vespa/embedding.bert-base-embedder.def deleted file mode 100644 index 14d953eeef9..00000000000 --- a/configdefinitions/src/vespa/embedding.bert-base-embedder.def +++ /dev/null @@ -1,27 +0,0 @@ - -namespace=embedding - -# Wordpiece tokenizer -tokenizerVocab model - -transformerModel model - -# Max length of token sequence model can handle -transformerMaxTokens int default=384 - -# Pooling strategy -poolingStrategy enum { cls, mean } default=mean - -# Input names -transformerInputIds string default=input_ids -transformerAttentionMask string default=attention_mask -transformerTokenTypeIds string default=token_type_ids - -# Output name -transformerOutput string default=output_0 - -# Settings for ONNX model evaluation -onnxExecutionMode enum { parallel, sequential } default=sequential -onnxInterOpThreads int default=1 -onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n - diff --git a/model-integration/CMakeLists.txt b/model-integration/CMakeLists.txt index 4423746ccbc..9a19c5c31b0 100644 --- a/model-integration/CMakeLists.txt +++ b/model-integration/CMakeLists.txt @@ -3,4 +3,6 @@ install_jar(model-integration-jar-with-dependencies.jar) vespa_install_script(src/main/python/vespa-convert-tf2onnx.py vespa-convert-tf2onnx bin) -install(FILES src/main/config/model-integration.xml DESTINATION conf/configserver-app) \ No newline at end of file +install(FILES src/main/config/model-integration.xml DESTINATION conf/configserver-app) + +install_config_definitions() \ No newline at end of file diff --git a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java index 149598ee2dd..4a1f59cc5cf 100644 --- a/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java +++ b/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java @@ -17,7 +17,7 @@ import java.util.Map; /** * A BERT Base compatible embedder. This embedder uses a WordPiece embedder to - * produce a token sequence that is input to a transformer model. A BERT base + * produce a token sequence that is then input to a transformer model. A BERT base * compatible transformer model must have three inputs: * * - A token sequence (input_ids) @@ -76,7 +76,7 @@ public class BertBaseEmbedder implements Embedder { private void validateName(Map types, String name, String type) { if ( ! types.containsKey(name)) { throw new IllegalArgumentException("Model does not contain required " + type + ": '" + name + "'. " + - "Model contains: " + String.join(",", types.keySet())); + "Model contains: " + String.join(",", types.keySet())); } } diff --git a/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def new file mode 100644 index 00000000000..14d953eeef9 --- /dev/null +++ b/model-integration/src/main/resources/configdefinitions/embedding.bert-base-embedder.def @@ -0,0 +1,27 @@ + +namespace=embedding + +# Wordpiece tokenizer +tokenizerVocab model + +transformerModel model + +# Max length of token sequence model can handle +transformerMaxTokens int default=384 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask +transformerTokenTypeIds string default=token_type_ids + +# Output name +transformerOutput string default=output_0 + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n + -- cgit v1.2.3