summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-05-04 20:40:00 +0200
committerJon Bratseth <bratseth@gmail.com>2021-05-04 20:40:00 +0200
commit69e5745894fbcfeb21e45f7d5664084b41ccbe11 (patch)
tree04502511ede97cf8abb1d3da3628aa0a5a95023b /linguistics/src/main/java/com/yahoo/language
parentaf59be1ed263f1476dd5df0a696f328a7de72ccd (diff)
Expose tokens as map
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java17
2 files changed, 13 insertions, 6 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
index 60071c3147c..b6335d67967 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
@@ -36,7 +36,7 @@ public class SpecialTokenRegistry {
}
public SpecialTokenRegistry(List<SpecialTokens> specialTokensList) {
- specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t));
+ specialTokenMap = specialTokensList.stream().collect(Collectors.toUnmodifiableMap(t -> t.name(), t -> t));
}
private static List<SpecialTokens> specialTokensFrom(SpecialtokensConfig config) {
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
index 1170ce9ad1e..465d9b754b3 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
@@ -6,7 +6,9 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
+import java.util.Map;
import java.util.Objects;
+import java.util.stream.Collectors;
import static com.yahoo.language.LinguisticsCase.toLowerCase;
@@ -21,16 +23,18 @@ public class SpecialTokens {
private static final SpecialTokens empty = new SpecialTokens("(empty)", List.of());
private final String name;
- private final List<Token> tokens;
private final int maximumLength;
+ private final List<Token> tokens;
+ private final Map<String, String> tokenMap;
public SpecialTokens(String name, List<Token> tokens) {
tokens.stream().peek(token -> token.validate());
List<Token> mutableTokens = new ArrayList<>(tokens);
Collections.sort(mutableTokens);
- this.tokens = List.copyOf(mutableTokens);
this.name = name;
this.maximumLength = tokens.stream().mapToInt(token -> token.token().length()).max().orElse(0);
+ this.tokens = List.copyOf(mutableTokens);
+ this.tokenMap = tokens.stream().collect(Collectors.toUnmodifiableMap(t -> t.token(), t -> t.replacement()));
}
/** Returns the name of this special tokens list */
@@ -38,8 +42,11 @@ public class SpecialTokens {
return name;
}
- /** Returns a sorted immutable list of the special tokens in this */
- public List<Token> tokens() { return tokens; }
+ /**
+ * Returns the tokens of this as an immutable map from token to replacement.
+ * Tokens which do not have a replacement token maps to themselves.
+ */
+ public Map<String, String> asMap() { return tokenMap; }
/**
* Returns the special token starting at the start of the given string, or null if no
@@ -64,7 +71,7 @@ public class SpecialTokens {
return null;
}
- private boolean tokenEndsAt(int position,String string) {
+ private boolean tokenEndsAt(int position, String string) {
return !Character.isLetterOrDigit(string.charAt(position));
}