aboutsummaryrefslogtreecommitdiffstats
path: root/model-integration/src/test
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon.hallingstad@gmail.com>2022-03-31 16:11:48 +0200
committerGitHub <noreply@github.com>2022-03-31 16:11:48 +0200
commit80556883dcdc350e2b5cfebde8ef482baeb36872 (patch)
treea1b1f2fcc46f94a3ea04c7a75748ed41b41caeea /model-integration/src/test
parent47e4ac1d21570b8d8665e2d9a142871d7ea2442b (diff)
Revert "Add bert base embedder"
Diffstat (limited to 'model-integration/src/test')
-rw-r--r--model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java29
-rw-r--r--model-integration/src/test/models/onnx/transformer/dummy_transformer.onnxbin27895 -> 0 bytes
-rw-r--r--model-integration/src/test/models/onnx/transformer/dummy_transformer.py52
-rw-r--r--model-integration/src/test/models/onnx/transformer/dummy_vocab.txt35
4 files changed, 0 insertions, 116 deletions
diff --git a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java
deleted file mode 100644
index 519f24795ca..00000000000
--- a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package ai.vespa.embedding;
-
-import com.yahoo.config.UrlReference;
-import com.yahoo.tensor.Tensor;
-import com.yahoo.tensor.TensorType;
-import org.junit.Test;
-
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-
-public class BertBaseEmbedderTest {
-
- @Test
- public void testEmbedder() {
- BertBaseEmbedderConfig.Builder builder = new BertBaseEmbedderConfig.Builder();
- builder.tokenizerVocabUrl(new UrlReference("src/test/models/onnx/transformer/dummy_vocab.txt"));
- builder.transformerModelUrl(new UrlReference("src/test/models/onnx/transformer/dummy_transformer.onnx"));
- BertBaseEmbedder embedder = new BertBaseEmbedder(builder.build());
-
- TensorType destType = TensorType.fromSpec("tensor<float>(x[7])");
- List<Integer> tokens = List.of(1,2,3,4,5); // use random tokens instead of invoking the tokenizer
- Tensor embedding = embedder.embedTokens(tokens, destType);
-
- Tensor expected = Tensor.from("tensor<float>(x[7]):[-0.6178509, -0.8135831, 0.34416935, 0.3912577, -0.13068882, 2.5897025E-4, -0.18638384]");
- assertEquals(embedding, expected);
- }
-
-}
diff --git a/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx b/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx
deleted file mode 100644
index 2101beec786..00000000000
--- a/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx
+++ /dev/null
Binary files differ
diff --git a/model-integration/src/test/models/onnx/transformer/dummy_transformer.py b/model-integration/src/test/models/onnx/transformer/dummy_transformer.py
deleted file mode 100644
index 1028035d7c0..00000000000
--- a/model-integration/src/test/models/onnx/transformer/dummy_transformer.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-import torch
-import torch.onnx
-import torch.nn as nn
-from torch.nn import TransformerEncoder, TransformerEncoderLayer
-
-
-class TransformerModel(nn.Module):
- def __init__(self, vocab_size, emb_size, num_heads, hidden_dim_size, num_layers, dropout=0.2):
- super(TransformerModel, self).__init__()
- self.encoder = nn.Embedding(vocab_size, emb_size)
- encoder_layers = TransformerEncoderLayer(emb_size, num_heads, hidden_dim_size, dropout)
- self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
-
- def forward(self, tokens, attention_mask, token_type_ids):
- src = self.encoder((tokens * attention_mask) + token_type_ids)
- output = self.transformer_encoder(src)
- return output
-
-
-def main():
- vocabulary_size = 20
- embedding_size = 16
- hidden_dim_size = 32
- num_layers = 2
- num_heads = 2
- model = TransformerModel(vocabulary_size, embedding_size, num_heads, hidden_dim_size, num_layers)
-
- # Omit training - just export randomly initialized network
-
- tokens = torch.LongTensor([[1,2,3,4,5]])
- attention_mask = torch.LongTensor([[1,1,1,1,1]])
- token_type_ids = torch.LongTensor([[0,0,0,0,0]])
- torch.onnx.export(model,
- (tokens, attention_mask, token_type_ids),
- "dummy_transformer.onnx",
- input_names = ["input_ids", "attention_mask", "token_type_ids"],
- output_names = ["output_0"],
- dynamic_axes = {
- "input_ids": {0:"batch", 1:"tokens"},
- "attention_mask": {0:"batch", 1:"tokens"},
- "token_type_ids": {0:"batch", 1:"tokens"},
- "output_0": {0:"batch", 1:"tokens"},
- },
- opset_version=12)
-
-
-if __name__ == "__main__":
- main()
-
-
diff --git a/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt b/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt
deleted file mode 100644
index 7dc0c6ecb6e..00000000000
--- a/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-x
-y
-z
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9 \ No newline at end of file