aboutsummaryrefslogtreecommitdiffstats
path: root/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java
diff options
context:
space:
mode:
Diffstat (limited to 'model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java')
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java47
1 files changed, 0 insertions, 47 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java
deleted file mode 100644
index e6765a4cc8a..00000000000
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/HuggingFaceTokenizer.java
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-package ai.vespa.embedding.huggingface;
-
-import java.io.IOException;
-import java.nio.file.Path;
-
-/**
- * Wrapper around Deep Java Library's HuggingFace tokenizer.
- *
- * @author bjorncs
- */
-public class HuggingFaceTokenizer implements AutoCloseable {
-
- private final ai.djl.huggingface.tokenizers.HuggingFaceTokenizer instance;
-
- public HuggingFaceTokenizer(Path path) throws IOException { this(path, HuggingFaceTokenizerOptions.defaults()); }
-
- public HuggingFaceTokenizer(Path path, HuggingFaceTokenizerOptions opts) throws IOException {
- var original = Thread.currentThread().getContextClassLoader();
- Thread.currentThread().setContextClassLoader(HuggingFaceTokenizer.class.getClassLoader());
- try {
- instance = createInstance(path, opts);
- } finally {
- Thread.currentThread().setContextClassLoader(original);
- }
- }
-
- public Encoding encode(String text) { return Encoding.from(instance.encode(text)); }
-
- @Override public void close() { instance.close(); }
-
- private static ai.djl.huggingface.tokenizers.HuggingFaceTokenizer createInstance(
- Path path, HuggingFaceTokenizerOptions opts) throws IOException {
- var builder = ai.djl.huggingface.tokenizers.HuggingFaceTokenizer.builder().optTokenizerPath(path);
- opts.addSpecialToken().ifPresent(builder::optAddSpecialTokens);
- opts.truncation().ifPresent(builder::optTruncation);
- if (opts.truncateFirstOnly()) builder.optTruncateFirstOnly();
- if (opts.truncateSecondOnly()) builder.optTruncateSecondOnly();
- opts.padding().ifPresent(builder::optPadding);
- if (opts.padToMaxLength()) builder.optPadToMaxLength();
- opts.maxLength().ifPresent(builder::optMaxLength);
- opts.padToMultipleOf().ifPresent(builder::optPadToMultipleOf);
- opts.stride().ifPresent(builder::optStride);
- return builder.build();
- }
-}