diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 20:40:00 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 20:40:00 +0200 |
commit | 69e5745894fbcfeb21e45f7d5664084b41ccbe11 (patch) | |
tree | 04502511ede97cf8abb1d3da3628aa0a5a95023b /linguistics | |
parent | af59be1ed263f1476dd5df0a696f328a7de72ccd (diff) |
Expose tokens as map
Diffstat (limited to 'linguistics')
4 files changed, 17 insertions, 12 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index 34c388b8a2e..b77b03664d4 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -472,7 +472,7 @@ "methods": [ "public void <init>(java.lang.String, java.util.List)", "public java.lang.String name()", - "public java.util.List tokens()", + "public java.util.Map asMap()", "public com.yahoo.language.process.SpecialTokens$Token tokenize(java.lang.String, boolean)", "public static com.yahoo.language.process.SpecialTokens empty()" ], diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java index 60071c3147c..b6335d67967 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java @@ -36,7 +36,7 @@ public class SpecialTokenRegistry { } public SpecialTokenRegistry(List<SpecialTokens> specialTokensList) { - specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t)); + specialTokenMap = specialTokensList.stream().collect(Collectors.toUnmodifiableMap(t -> t.name(), t -> t)); } private static List<SpecialTokens> specialTokensFrom(SpecialtokensConfig config) { diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java index 1170ce9ad1e..465d9b754b3 100644 --- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java @@ -6,7 +6,9 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; import static com.yahoo.language.LinguisticsCase.toLowerCase; @@ -21,16 +23,18 @@ public class SpecialTokens { private static final SpecialTokens empty = new SpecialTokens("(empty)", List.of()); private final String name; - private final List<Token> tokens; private final int maximumLength; + private final List<Token> tokens; + private final Map<String, String> tokenMap; public SpecialTokens(String name, List<Token> tokens) { tokens.stream().peek(token -> token.validate()); List<Token> mutableTokens = new ArrayList<>(tokens); Collections.sort(mutableTokens); - this.tokens = List.copyOf(mutableTokens); this.name = name; this.maximumLength = tokens.stream().mapToInt(token -> token.token().length()).max().orElse(0); + this.tokens = List.copyOf(mutableTokens); + this.tokenMap = tokens.stream().collect(Collectors.toUnmodifiableMap(t -> t.token(), t -> t.replacement())); } /** Returns the name of this special tokens list */ @@ -38,8 +42,11 @@ public class SpecialTokens { return name; } - /** Returns a sorted immutable list of the special tokens in this */ - public List<Token> tokens() { return tokens; } + /** + * Returns the tokens of this as an immutable map from token to replacement. + * Tokens which do not have a replacement token maps to themselves. + */ + public Map<String, String> asMap() { return tokenMap; } /** * Returns the special token starting at the start of the given string, or null if no @@ -64,7 +71,7 @@ public class SpecialTokens { return null; } - private boolean tokenEndsAt(int position,String string) { + private boolean tokenEndsAt(int position, String string) { return !Character.isLetterOrDigit(string.charAt(position)); } diff --git a/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java index fee70e3a407..47c3ba7933c 100644 --- a/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java +++ b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java @@ -32,11 +32,9 @@ public class SpecialTokensTestCase { var defaultTokens = registry.getSpecialTokens("default"); assertEquals("default", defaultTokens.name()); - assertEquals(2, defaultTokens.tokens().size()); - assertEquals("c++", defaultTokens.tokens().get(0).token()); - assertEquals("cpp", defaultTokens.tokens().get(0).replacement()); - assertEquals("...", defaultTokens.tokens().get(1).token()); - assertEquals("...", defaultTokens.tokens().get(1).replacement()); + assertEquals(2, defaultTokens.asMap().size()); + assertEquals("cpp", defaultTokens.asMap().get("c++")); + assertEquals("...", defaultTokens.asMap().get("...")); } } |