From 69e5745894fbcfeb21e45f7d5664084b41ccbe11 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Tue, 4 May 2021 20:40:00 +0200 Subject: Expose tokens as map --- .../yahoo/language/process/SpecialTokenRegistry.java | 2 +- .../java/com/yahoo/language/process/SpecialTokens.java | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'linguistics/src/main') diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java index 60071c3147c..b6335d67967 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java @@ -36,7 +36,7 @@ public class SpecialTokenRegistry { } public SpecialTokenRegistry(List specialTokensList) { - specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t)); + specialTokenMap = specialTokensList.stream().collect(Collectors.toUnmodifiableMap(t -> t.name(), t -> t)); } private static List specialTokensFrom(SpecialtokensConfig config) { diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java index 1170ce9ad1e..465d9b754b3 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java @@ -6,7 +6,9 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; import static com.yahoo.language.LinguisticsCase.toLowerCase; @@ -21,16 +23,18 @@ public class SpecialTokens { private static final SpecialTokens empty = new SpecialTokens("(empty)", List.of()); private final String name; - private final List tokens; private final int maximumLength; + private final List tokens; + private final Map tokenMap; public SpecialTokens(String name, List tokens) { tokens.stream().peek(token -> token.validate()); List mutableTokens = new ArrayList<>(tokens); Collections.sort(mutableTokens); - this.tokens = List.copyOf(mutableTokens); this.name = name; this.maximumLength = tokens.stream().mapToInt(token -> token.token().length()).max().orElse(0); + this.tokens = List.copyOf(mutableTokens); + this.tokenMap = tokens.stream().collect(Collectors.toUnmodifiableMap(t -> t.token(), t -> t.replacement())); } /** Returns the name of this special tokens list */ @@ -38,8 +42,11 @@ public class SpecialTokens { return name; } - /** Returns a sorted immutable list of the special tokens in this */ - public List tokens() { return tokens; } + /** + * Returns the tokens of this as an immutable map from token to replacement. + * Tokens which do not have a replacement token maps to themselves. + */ + public Map asMap() { return tokenMap; } /** * Returns the special token starting at the start of the given string, or null if no @@ -64,7 +71,7 @@ public class SpecialTokens { return null; } - private boolean tokenEndsAt(int position,String string) { + private boolean tokenEndsAt(int position, String string) { return !Character.isLetterOrDigit(string.charAt(position)); } -- cgit v1.2.3