aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/simple
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2021-12-17 13:38:05 +0100
committerJon Marius Venstad <venstad@gmail.com>2021-12-17 15:31:40 +0100
commitd050d0339f3ad8af9f0e286881d2a2d582317d31 (patch)
treea8012b11f447eb96661fb6358228d1d7cee54e77 /linguistics/src/main/java/com/yahoo/language/simple
parent8908e29b8b40e80edc85455c77955c1dfae99cf0 (diff)
Replace optimaize with OpenNLP language detector
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/simple')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java6
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java4
2 files changed, 6 insertions, 4 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
index 53b8ad7ad70..61d446cd8d0 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
@@ -130,10 +130,14 @@ public class SimpleDetector implements Detector {
}
public String guessEncoding(byte[] input) {
+ return guessEncoding(input, 0, input.length);
+ }
+
+ public String guessEncoding(byte[] input, int offset, int length) {
boolean isUtf8 = true;
boolean hasHighs = false;
scan:
- for (int i = 0; i < input.length; i++) {
+ for (int i = offset; i < offset + length; i++) {
final int l = isLeadingFor(input[i]);
if (l < 0 || i + l >= input.length) {
hasHighs = true;
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
index 3ca46dcc4f1..b10beb8c9af 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
@@ -2,8 +2,7 @@
package com.yahoo.language.simple;
import com.google.inject.Inject;
-import com.yahoo.collections.Tuple2;
-import com.yahoo.component.Version;
+import com.yahoo.component.AbstractComponent;
import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.CharacterClasses;
@@ -16,7 +15,6 @@ import com.yahoo.language.process.Stemmer;
import com.yahoo.language.process.StemmerImpl;
import com.yahoo.language.process.Tokenizer;
import com.yahoo.language.process.Transformer;
-import com.yahoo.vespa.configdefinition.SpecialtokensConfig;
import java.util.List;