diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-08-24 07:21:14 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-08-24 07:23:05 +0000 |
commit | b720e164d9e88c33450b2bae2186e3214e4804bf (patch) | |
tree | b7a32c2d71c62b030ae1f8029aa7a0ae331790c9 /linguistics/src/main/java/com/yahoo/language/process | |
parent | ef1f0e04884a31f55011374b4fff0dcbe9fa7e30 (diff) |
handle plugin tokenizer returning tokens with empty original string
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java index a5e665b9444..9301e73aa5d 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java @@ -33,7 +33,10 @@ public class SegmenterImpl implements Segmenter { int len; if (token.isSpecialToken() || (len = token.getNumComponents()) == 0) { if (token.isIndexable()) { - out.add(token.getOrig()); + String orig = token.getOrig(); + if (! orig.isEmpty()) { + out.add(orig); + } } } else { for (int i = 0; i < len; ++i) { |