package=ai.vespa.embedding # Transformer model settings transformerModelUrl url # Max length of token sequence model can handle transformerMaxTokens int default=384 # Pooling strategy poolingStrategy enum { cls, mean } default=mean # Input names transformerInputIds string default=input_ids transformerAttentionMask string default=attention_mask transformerTokenTypeIds string default=token_type_ids # Output name transformerOutput string default=output_0 # Settings for ONNX model evaluation onnxExecutionMode enum { parallel, sequential } default=sequential onnxInterOpThreads int default=1 onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n # Settings for wordpiece tokenizer tokenizerVocabUrl url