diff options
Diffstat (limited to 'config-model/src/main/resources/schema/common.rnc')
-rw-r--r-- | config-model/src/main/resources/schema/common.rnc | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc index 21f3399a027..061e54740f1 100644 --- a/config-model/src/main/resources/schema/common.rnc +++ b/config-model/src/main/resources/schema/common.rnc @@ -53,6 +53,11 @@ GenericConfig = element config { anyElement* } +ModelReference = + attribute model-id { xsd:string }? & + attribute path { xsd:string }? & + attribute url { xsd:string }? + ComponentSpec = ( attribute id { xsd:Name | JavaId } | attribute idref { xsd:Name } | attribute ident { xsd:Name } ) @@ -64,7 +69,7 @@ BundleSpec = attribute bundle { xsd:Name }? Component = element component { - ComponentDefinition + (ComponentDefinition | TypedComponentDefinition) } ComponentDefinition = @@ -72,3 +77,51 @@ ComponentDefinition = BundleSpec & GenericConfig* & Component* + +TypedComponentDefinition = + attribute id { xsd:Name } & + (HuggingFaceEmbedder | HuggingFaceTokenizer | BertBaseEmbedder) & + GenericConfig* & + Component* + +HuggingFaceEmbedder = + attribute type { "hugging-face-embedder" } & + element transformer-model { ModelReference } & + element tokenizer-model { ModelReference }? & + element max-tokens { xsd:nonNegativeInteger }? & + element transformer-input-ids { xsd:string }? & + element transformer-attention-mask { xsd:string }? & + element transformer-token-type-ids { xsd:string }? & + element transformer-output { xsd:string }? & + element normalize { xsd:boolean }? & + OnnxModelExecutionParams & + EmbedderPoolingStrategy + +HuggingFaceTokenizer = + attribute type { "hugging-face-tokenizer" } & + element model { attribute language { xsd:string }? & ModelReference }+ & + element special-tokens { xsd:boolean }? & + element max-length { xsd:integer }? & + element truncation { xsd:boolean }? + +BertBaseEmbedder = + attribute type { "bert-embedder" } & + element transformer-model { ModelReference } & + element tokenizer-vocab { ModelReference } & + element max-tokens { xsd:nonNegativeInteger }? & + element transformer-input-ids { xsd:string }? & + element transformer-attention-mask { xsd:string }? & + element transformer-token-type-ids { xsd:string }? & + element transformer-output { xsd:string }? & + element transformer-start-sequence-token { xsd:integer }? & + element transformer-end-sequence-token { xsd:integer }? & + OnnxModelExecutionParams & + EmbedderPoolingStrategy + +OnnxModelExecutionParams = + element onnx-execution-mode { "parallel" | "sequential" }? & + element onnx-interop-threads { xsd:integer }? & + element onnx-intraop-threads { xsd:integer }? & + element onnx-gpu-device { xsd:integer }? + +EmbedderPoolingStrategy = element pooling-strategy { "cls" | "mean" }?
\ No newline at end of file |