diff options
Diffstat (limited to 'linguistics')
3 files changed, 11 insertions, 12 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/Token.java b/linguistics/src/main/java/com/yahoo/language/process/Token.java index 73c0ac857ab..70b78ef1a92 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Token.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Token.java @@ -38,12 +38,12 @@ public interface Token { TokenScript getScript(); /** - * Returns token string in a form suitable for indexing: The - * most lowercased variant of the most processed token form available. + * Returns the token string in a form suitable for indexing: The + * most lowercased variant of the most processed token form available, * If called on a compound token this returns a lowercased form of the * entire word. - * - * @return token string value + * If this is a special token with a configured replacement, + * this will return the replacement token. */ String getTokenString(); diff --git a/linguistics/src/main/java/com/yahoo/language/process/Tokenizer.java b/linguistics/src/main/java/com/yahoo/language/process/Tokenizer.java index 7e61cd885a8..5be0a6fa635 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/Tokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/process/Tokenizer.java @@ -23,16 +23,11 @@ public interface Tokenizer { Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents); /** - * Return a replacement for an input token string. - * This accepts strings returned by Token.getTokenString - * and returns a replacement which will be used as the index token. - * The input token string is returned if there is no replacement. - * <p> - * This default implementation always returns the input token string. + * Not used. * - * @param tokenString the token string of the term to lookup a replacement for - * @return the replacement, if any, or the argument token string if not + * @deprecated replacements are already applied in tokens returned by tokenize */ + @Deprecated // Remove on Vespa 8 default String getReplacementTerm(String tokenString) { return tokenString; } } diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java index 122b9b6dff6..7b63650fa94 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleToken.java @@ -25,6 +25,10 @@ public class SimpleToken implements Token { this.orig = orig; } + public SimpleToken(String orig, String tokenString) { + this.orig = orig; + } + @Override public String getOrig() { return orig; |