diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 20:40:00 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 20:40:00 +0200 |
commit | 69e5745894fbcfeb21e45f7d5664084b41ccbe11 (patch) | |
tree | 04502511ede97cf8abb1d3da3628aa0a5a95023b /linguistics/src/main/java/com/yahoo | |
parent | af59be1ed263f1476dd5df0a696f328a7de72ccd (diff) |
Expose tokens as map
Diffstat (limited to 'linguistics/src/main/java/com/yahoo')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java | 2 | ||||
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java | 17 |
2 files changed, 13 insertions, 6 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java index 60071c3147c..b6335d67967 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java @@ -36,7 +36,7 @@ public class SpecialTokenRegistry { } public SpecialTokenRegistry(List<SpecialTokens> specialTokensList) { - specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t)); + specialTokenMap = specialTokensList.stream().collect(Collectors.toUnmodifiableMap(t -> t.name(), t -> t)); } private static List<SpecialTokens> specialTokensFrom(SpecialtokensConfig config) { diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java index 1170ce9ad1e..465d9b754b3 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java @@ -6,7 +6,9 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; import static com.yahoo.language.LinguisticsCase.toLowerCase; @@ -21,16 +23,18 @@ public class SpecialTokens { private static final SpecialTokens empty = new SpecialTokens("(empty)", List.of()); private final String name; - private final List<Token> tokens; private final int maximumLength; + private final List<Token> tokens; + private final Map<String, String> tokenMap; public SpecialTokens(String name, List<Token> tokens) { tokens.stream().peek(token -> token.validate()); List<Token> mutableTokens = new ArrayList<>(tokens); Collections.sort(mutableTokens); - this.tokens = List.copyOf(mutableTokens); this.name = name; this.maximumLength = tokens.stream().mapToInt(token -> token.token().length()).max().orElse(0); + this.tokens = List.copyOf(mutableTokens); + this.tokenMap = tokens.stream().collect(Collectors.toUnmodifiableMap(t -> t.token(), t -> t.replacement())); } /** Returns the name of this special tokens list */ @@ -38,8 +42,11 @@ public class SpecialTokens { return name; } - /** Returns a sorted immutable list of the special tokens in this */ - public List<Token> tokens() { return tokens; } + /** + * Returns the tokens of this as an immutable map from token to replacement. + * Tokens which do not have a replacement token maps to themselves. + */ + public Map<String, String> asMap() { return tokenMap; } /** * Returns the special token starting at the start of the given string, or null if no @@ -64,7 +71,7 @@ public class SpecialTokens { return null; } - private boolean tokenEndsAt(int position,String string) { + private boolean tokenEndsAt(int position, String string) { return !Character.isLetterOrDigit(string.charAt(position)); } |