From a2afdafbffcbc09594fd629c65746ec253f180be Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 16 Sep 2021 11:04:56 +0200 Subject: Make SentencePieceEncoder configurable --- .../sentencepiece/SentencePieceTester.java | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java (limited to 'linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java') diff --git a/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java b/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java new file mode 100644 index 00000000000..dee9be5aa7e --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/sentencepiece/SentencePieceTester.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// + +package com.yahoo.language.sentencepiece; + +import com.yahoo.language.Language; + +import java.nio.file.Path; + +import static org.junit.Assert.assertArrayEquals; + +class SentencePieceTester { + + private final SentencePieceEncoder encoder; + + public SentencePieceTester(Path model) { + this(new SentencePieceEncoder.Builder().addDefaultModel(model)); + } + + public SentencePieceTester(SentencePieceEncoder.Builder builder) { + this(builder.build()); + } + + public SentencePieceTester(SentencePieceEncoder encoder) { + this.encoder = encoder; + } + + public void assertEncoded(String input, Integer... expectedCodes) { + assertArrayEquals(expectedCodes, encoder.encode(input, Language.UNKNOWN).toArray()); + } + + public void assertSegmented(String input, String... expectedSegments) { + assertSegmented(Language.UNKNOWN, input, expectedSegments); + } + + public void assertSegmented(Language language, String input, String... expectedSegments) { + assertArrayEquals(expectedSegments, encoder.segment(input, language).toArray()); + } + +} -- cgit v1.2.3