From b720e164d9e88c33450b2bae2186e3214e4804bf Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 24 Aug 2020 07:21:14 +0000 Subject: handle plugin tokenizer returning tokens with empty original string --- .../src/main/java/com/yahoo/language/process/SegmenterImpl.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'linguistics/src/main/java/com/yahoo/language/process') diff --git a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java index a5e665b9444..9301e73aa5d 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java @@ -33,7 +33,10 @@ public class SegmenterImpl implements Segmenter { int len; if (token.isSpecialToken() || (len = token.getNumComponents()) == 0) { if (token.isIndexable()) { - out.add(token.getOrig()); + String orig = token.getOrig(); + if (! orig.isEmpty()) { + out.add(orig); + } } } else { for (int i = 0; i < len; ++i) { -- cgit v1.2.3