summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-09-21 17:45:50 +0200
committerJon Bratseth <bratseth@gmail.com>2021-09-21 17:45:50 +0200
commit7f9ed6ede10ba75023d640e38108352ee0c36a37 (patch)
tree83d2823c3ee9f1a3e357086201cc54716b024ff4 /linguistics
parent83bfc1f7f0469d5d03096a1ca42a697c1e18bf4a (diff)
Linguistics cleanup
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/abi-spec.json11
-rw-r--r--linguistics/src/main/java/com/yahoo/language/Linguistics.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/detect/Detection.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/detect/DetectionException.java3
-rw-r--r--linguistics/src/main/java/com/yahoo/language/detect/Hint.java5
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java3
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java10
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/StemList.java4
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/StemMode.java16
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/Stemmer.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/StemmerImpl.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/TokenScript.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java9
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java1
18 files changed, 29 insertions, 45 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index dc7450678c5..dbf4842ea1a 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -918,16 +918,5 @@
"public java.lang.String normalize(java.lang.String)"
],
"fields": []
- },
- "com.yahoo.language.sentencepiece.Trie": {
- "superClass": "java.lang.Object",
- "interfaces": [],
- "attributes": [
- "public"
- ],
- "methods": [
- "public void <init>()"
- ],
- "fields": []
}
} \ No newline at end of file
diff --git a/linguistics/src/main/java/com/yahoo/language/Linguistics.java b/linguistics/src/main/java/com/yahoo/language/Linguistics.java
index 64ef8762be8..8af0fcd42cb 100644
--- a/linguistics/src/main/java/com/yahoo/language/Linguistics.java
+++ b/linguistics/src/main/java/com/yahoo/language/Linguistics.java
@@ -88,4 +88,5 @@ public interface Linguistics {
/** Check if another instance is equivalent to this one */
boolean equals(Linguistics other);
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/detect/Detection.java b/linguistics/src/main/java/com/yahoo/language/detect/Detection.java
index 4b816335154..127777db4d2 100644
--- a/linguistics/src/main/java/com/yahoo/language/detect/Detection.java
+++ b/linguistics/src/main/java/com/yahoo/language/detect/Detection.java
@@ -44,4 +44,5 @@ public class Detection {
public boolean isLocal() {
return local;
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/detect/DetectionException.java b/linguistics/src/main/java/com/yahoo/language/detect/DetectionException.java
index a43dc0cb537..5fceabefae3 100644
--- a/linguistics/src/main/java/com/yahoo/language/detect/DetectionException.java
+++ b/linguistics/src/main/java/com/yahoo/language/detect/DetectionException.java
@@ -4,11 +4,12 @@ package com.yahoo.language.detect;
/**
* Exception that is thrown when detection fails.
*
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ * @author Einar M R Rosenvinge
*/
public final class DetectionException extends RuntimeException {
public DetectionException(String str) {
super(str);
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/detect/Hint.java b/linguistics/src/main/java/com/yahoo/language/detect/Hint.java
index 50291c922e8..b6bf4403cf3 100644
--- a/linguistics/src/main/java/com/yahoo/language/detect/Hint.java
+++ b/linguistics/src/main/java/com/yahoo/language/detect/Hint.java
@@ -2,9 +2,9 @@
package com.yahoo.language.detect;
/**
- * <p>A hint that can be given to a {@link Detector}.</p>
+ * A hint that can be given to a {@link Detector}.
*
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ * @author Einar M R Rosenvinge
*/
public class Hint {
@@ -35,4 +35,5 @@ public class Hint {
public static Hint newInstance(String market, String country) {
return new Hint(market, country);
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
index 64888dba183..0edd48f5ee3 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
@@ -49,4 +49,5 @@ public class OpenNlpLinguistics extends SimpleLinguistics {
@Override
public boolean equals(Linguistics other) { return (other instanceof OpenNlpLinguistics); }
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
index 73518876c3f..603905bead8 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
@@ -19,7 +19,6 @@ import opennlp.tools.stemmer.Stemmer;
import opennlp.tools.stemmer.snowball.SnowballStemmer;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
/**
@@ -52,7 +51,7 @@ public class OpenNlpTokenizer implements Tokenizer {
@Override
public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) {
- if (input.isEmpty()) return Collections.emptyList();
+ if (input.isEmpty()) return List.of();
Stemmer stemmer = stemmerFor(language, stemMode);
if (stemmer == null) return simpleTokenizer.tokenize(input, language, stemMode, removeAccents);
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java
index bf07c91ba44..9bf1281e015 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java
@@ -32,10 +32,10 @@ import java.util.logging.Level;
*/
public class OptimaizeDetector implements Detector {
- static private Object initGuard = new Object();
- static private TextObjectFactory textObjectFactory = null;
- static private LanguageDetector languageDetector = null;
- static private final Logger log = Logger.getLogger(OptimaizeDetector.class.getName());
+ private static final Object initGuard = new Object();
+ private static TextObjectFactory textObjectFactory = null;
+ private static LanguageDetector languageDetector = null;
+ private static final Logger log = Logger.getLogger(OptimaizeDetector.class.getName());
static private void initOptimaize() {
synchronized (initGuard) {
@@ -60,7 +60,7 @@ public class OptimaizeDetector implements Detector {
}
}
- private SimpleDetector simpleDetector = new SimpleDetector();
+ private final SimpleDetector simpleDetector = new SimpleDetector();
public OptimaizeDetector() {
initOptimaize();
diff --git a/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java b/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
index 752992f5a26..99576240635 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/ProcessingException.java
@@ -15,4 +15,5 @@ public class ProcessingException extends RuntimeException {
public ProcessingException(String message, Throwable cause) {
super(message, cause);
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/process/StemList.java b/linguistics/src/main/java/com/yahoo/language/process/StemList.java
index a38a2e51cb6..d5451e7660d 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/StemList.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/StemList.java
@@ -3,6 +3,7 @@ package com.yahoo.language.process;
import java.util.AbstractList;
import java.util.ArrayList;
+import java.util.List;
/**
* A list of strings which does not allow for duplicate elements.
@@ -10,7 +11,8 @@ import java.util.ArrayList;
* @author steinar
*/
public class StemList extends AbstractList<String> {
- private final ArrayList<String> stems;
+
+ private final List<String> stems;
public StemList() {
this(new String[0]);
diff --git a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
index 628f6910c9e..4adb5de62da 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/StemMode.java
@@ -10,16 +10,10 @@ package com.yahoo.language.process;
*/
public enum StemMode {
- NONE(0),
- DEFAULT(1),
- ALL(2),
- SHORTEST(4),
- BEST(5);
-
- private final int value;
-
- StemMode(int value) {
- this.value = value;
- }
+ NONE,
+ DEFAULT,
+ ALL,
+ SHORTEST,
+ BEST;
}
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Stemmer.java b/linguistics/src/main/java/com/yahoo/language/process/Stemmer.java
index a2d0d0a84c9..1c6180c1f59 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/Stemmer.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/Stemmer.java
@@ -18,7 +18,7 @@ public interface Stemmer {
* @param input the string to stem.
* @param mode the stemming mode
* @param language the language to use for stemming
- * @return list of possible stems. Empty if none.
+ * @return a list of possible stems. Empty if none.
* @throws ProcessingException thrown if there is an exception stemming this input
*/
List<StemList> stem(String input, StemMode mode, Language language);
diff --git a/linguistics/src/main/java/com/yahoo/language/process/StemmerImpl.java b/linguistics/src/main/java/com/yahoo/language/process/StemmerImpl.java
index f401ddaba99..dd830570e88 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/StemmerImpl.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/StemmerImpl.java
@@ -43,4 +43,5 @@ public class StemmerImpl implements Stemmer {
}
}
}
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/process/TokenScript.java b/linguistics/src/main/java/com/yahoo/language/process/TokenScript.java
index efe4073d97e..ff87b9b128b 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/TokenScript.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/TokenScript.java
@@ -5,7 +5,7 @@ package com.yahoo.language.process;
* List of token scripts (e.g. latin, japanese, chinese, etc.) which may warrant different
* linguistics treatment.
*
- * @author <a href="mailto:mathiasm@yahoo-inc.com">Mathias Mølster Lidal</a>
+ * @author Mathias Mølster Lidal
*/
public enum TokenScript {
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java b/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java
index 9abed89e7a2..8e7c2db2ed3 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java
+++ b/linguistics/src/main/java/com/yahoo/language/sentencepiece/Trie.java
@@ -9,7 +9,7 @@ import java.util.Map;
*
* @author bratseth
*/
-public class Trie {
+class Trie {
final Node root = new Node();
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
index 3de0eb3e997..e15c6257414 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java
@@ -70,15 +70,6 @@ public class SimpleDetector implements Detector {
block == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
return Language.KOREAN;
}
- // katakana phonetic extensions.
- if (0x31f0 <= c && c <= 0x31ff) {
- // See http://www.unicode.org/charts/PDF/U31F0.pdf
- // This is a special case because This range of character
- // codes is classified as unasigned in
- // Character.UnicodeBlock. But clearly it is assigned as
- // per above.
- return Language.JAPANESE;
- }
if (0x31f0 <= c && c <= 0x31ff || // these are standard character blocks for japanese characters.
block == Character.UnicodeBlock.HIRAGANA ||
block == Character.UnicodeBlock.KATAKANA ||
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
index 026bc8add25..b319c343510 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java
@@ -72,4 +72,5 @@ public class SimpleLinguistics implements Linguistics {
@Override
public boolean equals(Linguistics other) { return (other instanceof SimpleLinguistics); }
+
}
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
index d7eb8a72ed8..b5c11b13c67 100644
--- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
+++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenType.java
@@ -65,4 +65,5 @@ public class SimpleTokenType {
}
throw new UnsupportedOperationException(String.valueOf(Character.getType(codePoint)));
}
+
}