summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-01-10 19:24:03 +0100
committerJon Bratseth <bratseth@gmail.com>2022-01-10 19:24:03 +0100
commit0d095ccb083e66c99701bf0e2186cd0913227b58 (patch)
tree920508b9106035a9a26cb2f1be6badc2fb1c417f /linguistics
parent75852e3ce2a075c73c0845a8000df4db4c1f7260 (diff)
Stem by linguistics in rule bases
Also add a @language directive to stem in other languages than english.
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/abi-spec.json1
-rw-r--r--linguistics/src/main/java/com/yahoo/language/Language.java23
2 files changed, 21 insertions, 3 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index 31612bea983..910056286ec 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -13,6 +13,7 @@
"public java.lang.String languageCode()",
"public boolean isCjk()",
"public static com.yahoo.language.Language fromLanguageTag(java.lang.String)",
+ "public static com.yahoo.language.Language from(java.lang.String)",
"public static com.yahoo.language.Language fromLocale(java.util.Locale)",
"public static com.yahoo.language.Language fromEncoding(java.lang.String)"
],
diff --git a/linguistics/src/main/java/com/yahoo/language/Language.java b/linguistics/src/main/java/com/yahoo/language/Language.java
index 9f60985c119..e4ac280af9e 100644
--- a/linguistics/src/main/java/com/yahoo/language/Language.java
+++ b/linguistics/src/main/java/com/yahoo/language/Language.java
@@ -6,6 +6,7 @@ import com.yahoo.text.Lowercase;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.Objects;
/**
* @author Rich Pito
@@ -529,10 +530,11 @@ public enum Language {
}
/**
- * <p>Convenience method for calling <code>fromLocale(LocaleFactory.fromLanguageTag(languageTag))</code>.</p>
+ * Convenience method for calling <code>fromLocale(LocaleFactory.fromLanguageTag(languageTag))</code>.
+ * Returns UNKNOWN when passed null or an unknown language tag.
*
- * @param languageTag The language tag for which the <code>Language</code> to return.
- * @return the corresponding <code>Language</code>, or {@link #UNKNOWN} if not known.
+ * @param languageTag the language tag for which the <code>Language</code> to return
+ * @return the corresponding <code>Language</code>, or {@link #UNKNOWN} if not known
*/
public static Language fromLanguageTag(String languageTag) {
if (languageTag == null) return UNKNOWN;
@@ -540,6 +542,21 @@ public enum Language {
}
/**
+ * Returns the Language from a language tag
+ *
+ * @param languageTag the language tag for which the <code>Language</code> to return, cannot be null
+ * @return the Language instance
+ * @throws IllegalArgumentException if the language tag is unknown
+ */
+ public static Language from(String languageTag) {
+ Objects.requireNonNull(languageTag, "languageTag cannot be null");
+ Language language = fromLocale(LocaleFactory.fromLanguageTag(languageTag));
+ if ( ! languageTag.equalsIgnoreCase("unknown") && language == Language.UNKNOWN)
+ throw new IllegalArgumentException("Unknown language tag '" + languageTag + "'");
+ return language;
+ }
+
+ /**
* <p>Returns the <code>Language</code> whose {@link #languageCode()} is equal to <code>locale.getLanguage()</code>, with
* the following additions:</p>
* <ul>