diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2021-12-17 13:38:05 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2021-12-17 15:31:40 +0100 |
commit | d050d0339f3ad8af9f0e286881d2a2d582317d31 (patch) | |
tree | a8012b11f447eb96661fb6358228d1d7cee54e77 /linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java | |
parent | 8908e29b8b40e80edc85455c77955c1dfae99cf0 (diff) |
Replace optimaize with OpenNLP language detector
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java index 53b8ad7ad70..61d446cd8d0 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java @@ -130,10 +130,14 @@ public class SimpleDetector implements Detector { } public String guessEncoding(byte[] input) { + return guessEncoding(input, 0, input.length); + } + + public String guessEncoding(byte[] input, int offset, int length) { boolean isUtf8 = true; boolean hasHighs = false; scan: - for (int i = 0; i < input.length; i++) { + for (int i = offset; i < offset + length; i++) { final int l = isLeadingFor(input[i]); if (l < 0 || i + l >= input.length) { hasHighs = true; |