aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-09-14 21:36:39 +0200
committerJon Bratseth <bratseth@gmail.com>2021-09-14 21:36:39 +0200
commitfa0dbe36983238c8e17ee6fb45037a09ed49bdad (patch)
tree91560703f49921d190a3502673aa80cc6bbabee3 /linguistics
parent6c3b241781761d743bfe712836cef49c00d08c9e (diff)
Slight algorithm simplification
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java10
1 files changed, 4 insertions, 6 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java b/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
index bd895e18b80..a755a9e6ff3 100644
--- a/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
+++ b/linguistics/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEncoder.java
@@ -99,13 +99,11 @@ public class SentencePieceEncoder implements Segmenter {
while (node != null && characterPosition < input.length()) { // traverse the trie one character at the time from this position
node = node.children.get(input.charAt(characterPosition++));
int length = characterPosition - start;
- if (node != null && node.type != TokenType.unused) {
- if (node.isToken()) {
- float score = node.type == TokenType.userDefined ? (length * model.maxScore - 0.1f) : node.score;
- addSegment(TokenType.text, node.id, start, characterPosition, score, segmentEnds);
- }
+ if (node != null && node.isToken() && node.type != TokenType.unused) {
+ float score = node.type == TokenType.userDefined ? (length * model.maxScore - 0.1f) : node.score;
+ addSegment(TokenType.text, node.id, start, characterPosition, score, segmentEnds);
}
- else if (length == 1) { // add an 'unknown' token of length 1 instead to make the next position reachable
+ else if (length == 1) { // add an 'unknown' length 1 token to make the next position reachable
addSegment(TokenType.unknown, 0, start, start + 1, unknownScore, segmentEnds);
}
}