From ef5be496bc4857c5923f566251dd527873b248bf Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Mon, 13 Nov 2023 21:34:45 +0100 Subject: Revert "Bratseth/casing take 2" --- .../java/com/yahoo/language/LinguisticsCase.java | 1 - .../com/yahoo/language/process/GramSplitter.java | 3 +- .../com/yahoo/language/simple/SimpleToken.java | 37 ++++++---------------- .../com/yahoo/language/simple/SimpleTokenizer.java | 2 +- 4 files changed, 13 insertions(+), 30 deletions(-) (limited to 'linguistics') diff --git a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java index f0439a21fec..5ad6a382abd 100644 --- a/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java +++ b/linguistics/src/main/java/com/yahoo/language/LinguisticsCase.java @@ -26,7 +26,6 @@ public class LinguisticsCase { public static String toLowerCase(String in) { // def is picked from http://docs.oracle.com/javase/6/docs/api/java/lang/String.html#toLowerCase%28%29 // Also, at the time of writing, English is the default language for queries - if (in == null) return null; return Lowercase.toLowerCase(in); } diff --git a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java index 9178c2d7e09..33f5ee7e4bb 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java +++ b/linguistics/src/main/java/com/yahoo/language/process/GramSplitter.java @@ -189,8 +189,9 @@ public class GramSplitter { @Override public boolean equals(Object o) { if (this == o) return true; - if ( ! (o instanceof Gram gram)) return false; + if ( ! (o instanceof Gram)) return false; + Gram gram = (Gram)o; if (codePointCount != gram.codePointCount) return false; if (start != gram.start) return false; return true; diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java index 809e9b8d133..6cc68c7ac14 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java @@ -15,48 +15,35 @@ import java.util.Objects; public class SimpleToken implements Token { private final List components = new ArrayList<>(); - private final String original; + private final String orig; private TokenType type = TokenType.UNKNOWN; private TokenScript script = TokenScript.UNKNOWN; private String tokenString; - private List stems = null; // Any additional stems after tokenString private boolean specialToken = false; private long offset = 0; - public SimpleToken(String original) { - this(original, (String)null); + public SimpleToken(String orig) { + this(orig, null); } - public SimpleToken(String original, String tokenString) { - this.original = original; + public SimpleToken(String orig, String tokenString) { + this.orig = orig; this.tokenString = tokenString; } - /** Exposed as fromStems */ - private SimpleToken(String original, List stems) { - this.type = TokenType.ALPHABETIC; // Only type which may have stems - this.original = original; - this.tokenString = stems.get(0); - this.stems = List.copyOf(stems.subList(1, stems.size())); - } - @Override public String getOrig() { - return original; + return orig; } @Override public int getNumStems() { - return (tokenString != null ? 1 : 0) + (stems != null ? stems.size() : 0); + return tokenString != null ? 1 : 0; } @Override public String getStem(int i) { - if (i == 0) - return tokenString; - if (stems != null && i-1 < stems.size()) - return stems.get(i-1); - return tokenString; // TODO Vespa 9: throw new IllegalArgumentException() instead + return tokenString; } @Override @@ -144,12 +131,12 @@ public class SimpleToken implements Token { @Override public int hashCode() { - return original.hashCode(); + return orig.hashCode(); } @Override public String toString() { - return "token '" + original + "'"; + return "token '" + orig + "'"; } public String toDetailString() { @@ -184,8 +171,4 @@ public class SimpleToken implements Token { return getType().isIndexable() && (getOrig().length() > 0); } - public static SimpleToken fromStems(String original, List stems) { - return new SimpleToken(original, stems); - } - } diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java index b72d2bd6d37..98a84a48095 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java @@ -106,7 +106,7 @@ public class SimpleTokenizer implements Tokenizer { String oldToken = token; token = stemmer.stem(token); String newToken = token; - log.log(Level.FINEST, () -> "stem '" + oldToken + "' to '" + newToken + "'"); + log.log(Level.FINEST, () -> "stem '" + oldToken+"' to '" + newToken+"'"); } String result = token; log.log(Level.FINEST, () -> "processed token is: " + result); -- cgit v1.2.3