From d050d0339f3ad8af9f0e286881d2a2d582317d31 Mon Sep 17 00:00:00 2001 From: Jon Marius Venstad Date: Fri, 17 Dec 2021 13:38:05 +0100 Subject: Replace optimaize with OpenNLP language detector --- .../src/main/java/com/yahoo/language/simple/SimpleDetector.java | 6 +++++- .../src/main/java/com/yahoo/language/simple/SimpleLinguistics.java | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'linguistics/src/main/java/com/yahoo/language/simple') diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java index 53b8ad7ad70..61d446cd8d0 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java @@ -130,10 +130,14 @@ public class SimpleDetector implements Detector { } public String guessEncoding(byte[] input) { + return guessEncoding(input, 0, input.length); + } + + public String guessEncoding(byte[] input, int offset, int length) { boolean isUtf8 = true; boolean hasHighs = false; scan: - for (int i = 0; i < input.length; i++) { + for (int i = offset; i < offset + length; i++) { final int l = isLeadingFor(input[i]); if (l < 0 || i + l >= input.length) { hasHighs = true; diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java index 3ca46dcc4f1..b10beb8c9af 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java @@ -2,8 +2,7 @@ package com.yahoo.language.simple; import com.google.inject.Inject; -import com.yahoo.collections.Tuple2; -import com.yahoo.component.Version; +import com.yahoo.component.AbstractComponent; import com.yahoo.language.Linguistics; import com.yahoo.language.detect.Detector; import com.yahoo.language.process.CharacterClasses; @@ -16,7 +15,6 @@ import com.yahoo.language.process.Stemmer; import com.yahoo.language.process.StemmerImpl; import com.yahoo.language.process.Tokenizer; import com.yahoo.language.process.Transformer; -import com.yahoo.vespa.configdefinition.SpecialtokensConfig; import java.util.List; -- cgit v1.2.3