diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-12-17 12:41:17 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-12-17 12:41:17 +0100 |
commit | 601b117281b74a578126a0f3effead55bc79c680 (patch) | |
tree | 29619184a8459763cc024b23e74960e6c9ec7f81 /linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java | |
parent | 767cb63af0f530605180f5438767406e1db27520 (diff) |
BERT -> WordPiece, make subword prefix configurable
Diffstat (limited to 'linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java')
-rw-r--r-- | linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java | 50 |
1 files changed, 0 insertions, 50 deletions
diff --git a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java b/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java deleted file mode 100644 index 4dae53c60df..00000000000 --- a/linguistics-components/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// - -package com.yahoo.language.sentencepiece; - -import com.yahoo.language.Language; -import com.yahoo.language.process.Embedder; -import com.yahoo.tensor.Tensor; -import com.yahoo.tensor.TensorType; - -import java.nio.file.Path; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - -class SentencePieceTester { - - private final SentencePieceEmbedder embedder; - - public SentencePieceTester(Path model) { - this(new SentencePieceEmbedder.Builder().addDefaultModel(model)); - } - - public SentencePieceTester(SentencePieceEmbedder.Builder builder) { - this(builder.build()); - } - - public SentencePieceTester(SentencePieceEmbedder embedder) { - this.embedder = embedder; - } - - public void assertEmbedded(String input, Integer... expectedCodes) { - assertArrayEquals(expectedCodes, embedder.embed(input, new Embedder.Context("test")).toArray()); - } - - public void assertEmbedded(String input, String tensorType, String tensor) { - TensorType type = TensorType.fromSpec(tensorType); - Tensor expected = Tensor.from(type, tensor); - assertEquals(expected, embedder.embed(input, new Embedder.Context("test"), type)); - } - - public void assertSegmented(String input, String... expectedSegments) { - assertSegmented(Language.UNKNOWN, input, expectedSegments); - } - - public void assertSegmented(Language language, String input, String... expectedSegments) { - assertArrayEquals(expectedSegments, embedder.segment(input, language).toArray()); - } - -} |