diff options
author | Bjørn Christian Seime <bjorncs@yahooinc.com> | 2023-06-06 09:50:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 09:50:12 +0200 |
commit | 4ae8a32cd71cc23501f7e1737b27e0bcac7fbd41 (patch) | |
tree | 3e31ecaaab916dd6ce1bd51b3e552e09d578e1eb /config-model/src/main/resources/schema/common.rnc | |
parent | 4878116a848f0ceff01c49b67657d63a4113789d (diff) | |
parent | 6c664b24186756021e6b39801b9694d1815311bf (diff) |
Merge pull request #27297 from vespa-engine/bjorncs/bert-embedder-services-xml
Bjorncs/bert embedder services xml
Diffstat (limited to 'config-model/src/main/resources/schema/common.rnc')
-rw-r--r-- | config-model/src/main/resources/schema/common.rnc | 32 |
1 files changed, 26 insertions, 6 deletions
diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc index 4e7cb526efb..061e54740f1 100644 --- a/config-model/src/main/resources/schema/common.rnc +++ b/config-model/src/main/resources/schema/common.rnc @@ -80,7 +80,7 @@ ComponentDefinition = TypedComponentDefinition = attribute id { xsd:Name } & - (HuggingFaceEmbedder | HuggingFaceTokenizer) & + (HuggingFaceEmbedder | HuggingFaceTokenizer | BertBaseEmbedder) & GenericConfig* & Component* @@ -94,14 +94,34 @@ HuggingFaceEmbedder = element transformer-token-type-ids { xsd:string }? & element transformer-output { xsd:string }? & element normalize { xsd:boolean }? & - element onnx-execution-mode { "parallel" | "sequential" }? & - element onnx-interop-threads { xsd:integer }? & - element onnx-intraop-threads { xsd:integer }? & - element onnx-gpu-device { xsd:integer }? + OnnxModelExecutionParams & + EmbedderPoolingStrategy HuggingFaceTokenizer = attribute type { "hugging-face-tokenizer" } & element model { attribute language { xsd:string }? & ModelReference }+ & element special-tokens { xsd:boolean }? & element max-length { xsd:integer }? & - element truncation { xsd:boolean }?
\ No newline at end of file + element truncation { xsd:boolean }? + +BertBaseEmbedder = + attribute type { "bert-embedder" } & + element transformer-model { ModelReference } & + element tokenizer-vocab { ModelReference } & + element max-tokens { xsd:nonNegativeInteger }? & + element transformer-input-ids { xsd:string }? & + element transformer-attention-mask { xsd:string }? & + element transformer-token-type-ids { xsd:string }? & + element transformer-output { xsd:string }? & + element transformer-start-sequence-token { xsd:integer }? & + element transformer-end-sequence-token { xsd:integer }? & + OnnxModelExecutionParams & + EmbedderPoolingStrategy + +OnnxModelExecutionParams = + element onnx-execution-mode { "parallel" | "sequential" }? & + element onnx-interop-threads { xsd:integer }? & + element onnx-intraop-threads { xsd:integer }? & + element onnx-gpu-device { xsd:integer }? + +EmbedderPoolingStrategy = element pooling-strategy { "cls" | "mean" }?
\ No newline at end of file |