diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2022-03-31 16:11:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-31 16:11:48 +0200 |
commit | 80556883dcdc350e2b5cfebde8ef482baeb36872 (patch) | |
tree | a1b1f2fcc46f94a3ea04c7a75748ed41b41caeea /model-integration/src/test | |
parent | 47e4ac1d21570b8d8665e2d9a142871d7ea2442b (diff) |
Revert "Add bert base embedder"
Diffstat (limited to 'model-integration/src/test')
4 files changed, 0 insertions, 116 deletions
diff --git a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java b/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java deleted file mode 100644 index 519f24795ca..00000000000 --- a/model-integration/src/test/java/ai/vespa/embedding/BertBaseEmbedderTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package ai.vespa.embedding; - -import com.yahoo.config.UrlReference; -import com.yahoo.tensor.Tensor; -import com.yahoo.tensor.TensorType; -import org.junit.Test; - -import java.util.List; - -import static org.junit.Assert.assertEquals; - -public class BertBaseEmbedderTest { - - @Test - public void testEmbedder() { - BertBaseEmbedderConfig.Builder builder = new BertBaseEmbedderConfig.Builder(); - builder.tokenizerVocabUrl(new UrlReference("src/test/models/onnx/transformer/dummy_vocab.txt")); - builder.transformerModelUrl(new UrlReference("src/test/models/onnx/transformer/dummy_transformer.onnx")); - BertBaseEmbedder embedder = new BertBaseEmbedder(builder.build()); - - TensorType destType = TensorType.fromSpec("tensor<float>(x[7])"); - List<Integer> tokens = List.of(1,2,3,4,5); // use random tokens instead of invoking the tokenizer - Tensor embedding = embedder.embedTokens(tokens, destType); - - Tensor expected = Tensor.from("tensor<float>(x[7]):[-0.6178509, -0.8135831, 0.34416935, 0.3912577, -0.13068882, 2.5897025E-4, -0.18638384]"); - assertEquals(embedding, expected); - } - -} diff --git a/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx b/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx Binary files differdeleted file mode 100644 index 2101beec786..00000000000 --- a/model-integration/src/test/models/onnx/transformer/dummy_transformer.onnx +++ /dev/null diff --git a/model-integration/src/test/models/onnx/transformer/dummy_transformer.py b/model-integration/src/test/models/onnx/transformer/dummy_transformer.py deleted file mode 100644 index 1028035d7c0..00000000000 --- a/model-integration/src/test/models/onnx/transformer/dummy_transformer.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -import torch -import torch.onnx -import torch.nn as nn -from torch.nn import TransformerEncoder, TransformerEncoderLayer - - -class TransformerModel(nn.Module): - def __init__(self, vocab_size, emb_size, num_heads, hidden_dim_size, num_layers, dropout=0.2): - super(TransformerModel, self).__init__() - self.encoder = nn.Embedding(vocab_size, emb_size) - encoder_layers = TransformerEncoderLayer(emb_size, num_heads, hidden_dim_size, dropout) - self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers) - - def forward(self, tokens, attention_mask, token_type_ids): - src = self.encoder((tokens * attention_mask) + token_type_ids) - output = self.transformer_encoder(src) - return output - - -def main(): - vocabulary_size = 20 - embedding_size = 16 - hidden_dim_size = 32 - num_layers = 2 - num_heads = 2 - model = TransformerModel(vocabulary_size, embedding_size, num_heads, hidden_dim_size, num_layers) - - # Omit training - just export randomly initialized network - - tokens = torch.LongTensor([[1,2,3,4,5]]) - attention_mask = torch.LongTensor([[1,1,1,1,1]]) - token_type_ids = torch.LongTensor([[0,0,0,0,0]]) - torch.onnx.export(model, - (tokens, attention_mask, token_type_ids), - "dummy_transformer.onnx", - input_names = ["input_ids", "attention_mask", "token_type_ids"], - output_names = ["output_0"], - dynamic_axes = { - "input_ids": {0:"batch", 1:"tokens"}, - "attention_mask": {0:"batch", 1:"tokens"}, - "token_type_ids": {0:"batch", 1:"tokens"}, - "output_0": {0:"batch", 1:"tokens"}, - }, - opset_version=12) - - -if __name__ == "__main__": - main() - - diff --git a/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt b/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt deleted file mode 100644 index 7dc0c6ecb6e..00000000000 --- a/model-integration/src/test/models/onnx/transformer/dummy_vocab.txt +++ /dev/null @@ -1,35 +0,0 @@ -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -x -y -z -0 -1 -2 -3 -4 -5 -6 -7 -8 -9
\ No newline at end of file |