summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-05-04 20:40:00 +0200
committerJon Bratseth <bratseth@gmail.com>2021-05-04 20:40:00 +0200
commit69e5745894fbcfeb21e45f7d5664084b41ccbe11 (patch)
tree04502511ede97cf8abb1d3da3628aa0a5a95023b /linguistics
parentaf59be1ed263f1476dd5df0a696f328a7de72ccd (diff)
Expose tokens as map
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/abi-spec.json2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java2
-rw-r--r--linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java17
-rw-r--r--linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java8
4 files changed, 17 insertions, 12 deletions
diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json
index 34c388b8a2e..b77b03664d4 100644
--- a/linguistics/abi-spec.json
+++ b/linguistics/abi-spec.json
@@ -472,7 +472,7 @@
"methods": [
"public void <init>(java.lang.String, java.util.List)",
"public java.lang.String name()",
- "public java.util.List tokens()",
+ "public java.util.Map asMap()",
"public com.yahoo.language.process.SpecialTokens$Token tokenize(java.lang.String, boolean)",
"public static com.yahoo.language.process.SpecialTokens empty()"
],
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
index 60071c3147c..b6335d67967 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java
@@ -36,7 +36,7 @@ public class SpecialTokenRegistry {
}
public SpecialTokenRegistry(List<SpecialTokens> specialTokensList) {
- specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t));
+ specialTokenMap = specialTokensList.stream().collect(Collectors.toUnmodifiableMap(t -> t.name(), t -> t));
}
private static List<SpecialTokens> specialTokensFrom(SpecialtokensConfig config) {
diff --git a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
index 1170ce9ad1e..465d9b754b3 100644
--- a/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
+++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java
@@ -6,7 +6,9 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
+import java.util.Map;
import java.util.Objects;
+import java.util.stream.Collectors;
import static com.yahoo.language.LinguisticsCase.toLowerCase;
@@ -21,16 +23,18 @@ public class SpecialTokens {
private static final SpecialTokens empty = new SpecialTokens("(empty)", List.of());
private final String name;
- private final List<Token> tokens;
private final int maximumLength;
+ private final List<Token> tokens;
+ private final Map<String, String> tokenMap;
public SpecialTokens(String name, List<Token> tokens) {
tokens.stream().peek(token -> token.validate());
List<Token> mutableTokens = new ArrayList<>(tokens);
Collections.sort(mutableTokens);
- this.tokens = List.copyOf(mutableTokens);
this.name = name;
this.maximumLength = tokens.stream().mapToInt(token -> token.token().length()).max().orElse(0);
+ this.tokens = List.copyOf(mutableTokens);
+ this.tokenMap = tokens.stream().collect(Collectors.toUnmodifiableMap(t -> t.token(), t -> t.replacement()));
}
/** Returns the name of this special tokens list */
@@ -38,8 +42,11 @@ public class SpecialTokens {
return name;
}
- /** Returns a sorted immutable list of the special tokens in this */
- public List<Token> tokens() { return tokens; }
+ /**
+ * Returns the tokens of this as an immutable map from token to replacement.
+ * Tokens which do not have a replacement token maps to themselves.
+ */
+ public Map<String, String> asMap() { return tokenMap; }
/**
* Returns the special token starting at the start of the given string, or null if no
@@ -64,7 +71,7 @@ public class SpecialTokens {
return null;
}
- private boolean tokenEndsAt(int position,String string) {
+ private boolean tokenEndsAt(int position, String string) {
return !Character.isLetterOrDigit(string.charAt(position));
}
diff --git a/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java
index fee70e3a407..47c3ba7933c 100644
--- a/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java
+++ b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java
@@ -32,11 +32,9 @@ public class SpecialTokensTestCase {
var defaultTokens = registry.getSpecialTokens("default");
assertEquals("default", defaultTokens.name());
- assertEquals(2, defaultTokens.tokens().size());
- assertEquals("c++", defaultTokens.tokens().get(0).token());
- assertEquals("cpp", defaultTokens.tokens().get(0).replacement());
- assertEquals("...", defaultTokens.tokens().get(1).token());
- assertEquals("...", defaultTokens.tokens().get(1).replacement());
+ assertEquals(2, defaultTokens.asMap().size());
+ assertEquals("cpp", defaultTokens.asMap().get("c++"));
+ assertEquals("...", defaultTokens.asMap().get("..."));
}
}