From 6d433b7c567be0ffd4473a32884ec0fbe83a5df3 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Thu, 9 Nov 2023 09:46:37 +0100 Subject: Don't lowercase linguistics annotations Tokens are already lowercased by our bundled linguistics components. Lowercasing again when annotating precludes plugging in a lingustics component which preserves casing. --- linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java | 1 + linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'linguistics/src') diff --git a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java index 5ad6a382abd..f0439a21fec 100644 --- a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java +++ b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java @@ -26,6 +26,7 @@ public class LinguisticsCase { public static String toLowerCase(String in) { // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29 // Also, at the time of writing, English is the default language for queries + if (in == null) return null; return Lowercase.toLowerCase(in); } diff --git a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java index 33f5ee7e4bb..9178c2d7e09 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java +++ b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java @@ -189,9 +189,8 @@ public class GramSplitter { @Override public boolean equals(Object o) { if (this == o) return true; - if ( ! (o instanceof Gram)) return false; + if ( ! (o instanceof Gram gram)) return false; - Gram gram = (Gram)o; if (codePointCount != gram.codePointCount) return false; if (start != gram.start) return false; return true; -- cgit v1.2.3