From 0d095ccb083e66c99701bf0e2186cd0913227b58 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 10 Jan 2022 19:24:03 +0100 Subject: Stem by linguistics in rule bases Also add a @language directive to stem in other languages than english. --- linguistics/abi-spec.json | 1 + .../src/main/java/com/yahoo/language/Language.java | 23 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'linguistics') diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index 31612bea983..910056286ec 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -13,6 +13,7 @@ "public java.lang.String languageCode()", "public boolean isCjk()", "public static com.yahoo.language.Language fromLanguageTag(java.lang.String)", + "public static com.yahoo.language.Language from(java.lang.String)", "public static com.yahoo.language.Language fromLocale(java.util.Locale)", "public static com.yahoo.language.Language fromEncoding(java.lang.String)" ], diff --git a/linguistics/src/main/java/com/yahoo/language/Language.java b/linguistics/src/main/java/com/yahoo/language/Language.java index 9f60985c119..e4ac280af9e 100644 --- a/linguistics/src/main/java/com/yahoo/language/Language.java +++ b/linguistics/src/main/java/com/yahoo/language/Language.java @@ -6,6 +6,7 @@ import com.yahoo.text.Lowercase; import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.Objects; /** * @author Rich Pito @@ -529,16 +530,32 @@ public enum Language { } /** - *

Convenience method for calling fromLocale(LocaleFactory.fromLanguageTag(languageTag)).

+ * Convenience method for calling fromLocale(LocaleFactory.fromLanguageTag(languageTag)). + * Returns UNKNOWN when passed null or an unknown language tag. * - * @param languageTag The language tag for which the Language to return. - * @return the corresponding Language, or {@link #UNKNOWN} if not known. + * @param languageTag the language tag for which the Language to return + * @return the corresponding Language, or {@link #UNKNOWN} if not known */ public static Language fromLanguageTag(String languageTag) { if (languageTag == null) return UNKNOWN; return fromLocale(LocaleFactory.fromLanguageTag(languageTag)); } + /** + * Returns the Language from a language tag + * + * @param languageTag the language tag for which the Language to return, cannot be null + * @return the Language instance + * @throws IllegalArgumentException if the language tag is unknown + */ + public static Language from(String languageTag) { + Objects.requireNonNull(languageTag, "languageTag cannot be null"); + Language language = fromLocale(LocaleFactory.fromLanguageTag(languageTag)); + if ( ! languageTag.equalsIgnoreCase("unknown") && language == Language.UNKNOWN) + throw new IllegalArgumentException("Unknown language tag '" + languageTag + "'"); + return language; + } + /** *

Returns the Language whose {@link #languageCode()} is equal to locale.getLanguage(), with * the following additions:

-- cgit v1.2.3