summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--model-integration/src/main/java/ai/vespa/embedding/huggingface/Encoding.java4
1 files changed, 4 insertions, 0 deletions
diff --git a/model-integration/src/main/java/ai/vespa/embedding/huggingface/Encoding.java b/model-integration/src/main/java/ai/vespa/embedding/huggingface/Encoding.java
index f1c0244bfb3..274c29a57b2 100644
--- a/model-integration/src/main/java/ai/vespa/embedding/huggingface/Encoding.java
+++ b/model-integration/src/main/java/ai/vespa/embedding/huggingface/Encoding.java
@@ -14,9 +14,12 @@ public record Encoding(
List<Long> specialTokenMask, List<CharSpan> charTokenSpans, List<Encoding> overflowing) {
public record CharSpan(int start, int end) {
+ public static final CharSpan NONE = new CharSpan(-1, -1);
static CharSpan from(ai.djl.huggingface.tokenizers.jni.CharSpan s) {
+ if (s == null) return NONE;
return new CharSpan(s.getStart(), s.getEnd());
}
+ public boolean isNone() { return this.equals(NONE); }
}
public Encoding {
@@ -43,6 +46,7 @@ public record Encoding(
}
private static List<Long> toList(long[] array) {
+ if (array == null) return List.of();
var list = new ArrayList<Long>(array.length);
for (long e : array) list.add(e);
return list;