summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/process
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-08-24 07:21:14 +0000
committerArne Juul <arnej@verizonmedia.com>2020-08-24 07:23:05 +0000
commitb720e164d9e88c33450b2bae2186e3214e4804bf (patch)
treeb7a32c2d71c62b030ae1f8029aa7a0ae331790c9 /linguistics/src/main/java/com/yahoo/language/process
parentef1f0e04884a31f55011374b4fff0dcbe9fa7e30 (diff)
handle plugin tokenizer returning tokens with empty original string
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/process')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java5
1 files changed, 4 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java
index a5e665b9444..9301e73aa5d 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/SegmenterImpl.java
@@ -33,7 +33,10 @@ public class SegmenterImpl implements Segmenter {
int len;
if (token.isSpecialToken() || (len = token.getNumComponents()) == 0) {
if (token.isIndexable()) {
- out.add(token.getOrig());
+ String orig = token.getOrig();
+ if (! orig.isEmpty()) {
+ out.add(orig);
+ }
}
} else {
for (int i = 0; i < len; ++i) {