summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2023-10-20 17:09:20 +0200
committerjonmv <venstad@gmail.com>2023-10-20 17:09:20 +0200
commite228115788634d77f5b6354c12c1718252044860 (patch)
tree2317d7a3ebb49ff5328ed289a3fd646b2fffbd97
parent2ecc9af04be2dbebedbf0032990cd3b699aa35d5 (diff)
Use code point count for truncate as well
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java17
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java23
2 files changed, 7 insertions, 33 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index e133407a967..fe931ef34a3 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -170,15 +170,15 @@ public final class Text {
}
/**
- * Returns a string which is never larger than the given number of characters.
+ * Returns a string which is never larger than the given number of code points.
* If the string is longer than the given length it will be truncated.
* If length is 4 or less the string will be truncated to length.
* If length is longer than 4, it will be truncated at length-4 with " ..." added at the end.
*/
public static String truncate(String s, int length) {
- if (s.length() <= length) return s;
- if (length <= 4) return safeSubstring(s, length);
- return safeSubstring(s, length - 4) + " ...";
+ if (s.codePointCount(0, s.length()) <= length) return s;
+ if (length <= 4) return substringByCodepoints(s, 0, length);
+ return substringByCodepoints(s, 0, length - 4) + " ...";
}
public static String substringByCodepoints(String s, int fromCP, int toCP) {
@@ -209,13 +209,4 @@ public final class Text {
return String.format(Locale.US, format, args);
}
- /** Like {@link String#substring(int)}, but if this would split a surrogate pair at the end, the leading high surrogate is also cut. */
- public static String safeSubstring(String s, int length) {
- boolean pairCut = 0 < length
- && length < s.length()
- && Character.isHighSurrogate(s.charAt(length - 1))
- && Character.isLowSurrogate(s.charAt(length));
- return s.substring(0, length - (pairCut ? 1 : 0));
- }
-
}
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index b4324797086..9bb4668b7cb 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -83,24 +83,6 @@ public class TextTestCase {
}
@Test
- public void testSafeSubstring() {
- String withSurrogates = "abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef";
- assertEquals("", Text.safeSubstring(withSurrogates, 0));
- assertEquals("a", Text.safeSubstring(withSurrogates, 1));
- assertEquals("ab", Text.safeSubstring(withSurrogates, 2));
- assertEquals("abc", Text.safeSubstring(withSurrogates, 3));
- assertEquals("abc", Text.safeSubstring(withSurrogates, 4));
- assertEquals("abc\uD83D\uDE48", Text.safeSubstring(withSurrogates, 5));
- assertEquals("abc\uD83D\uDE48", Text.safeSubstring(withSurrogates, 6));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49", Text.safeSubstring(withSurrogates, 7));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49", Text.safeSubstring(withSurrogates, 8));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4A", Text.safeSubstring(withSurrogates, 9));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Ad", Text.safeSubstring(withSurrogates, 10));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Ade", Text.safeSubstring(withSurrogates, 11));
- assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", Text.safeSubstring(withSurrogates, 12));
- }
-
- @Test
public void testIsDisplayable() {
assertTrue(Text.isDisplayable('A'));
assertTrue(Text.isDisplayable('a'));
@@ -122,8 +104,9 @@ public class TextTestCase {
assertEquals("", Text.truncate("ab", 0));
assertEquals("ab c", Text.truncate("ab cde", 4));
assertEquals("a ...", Text.truncate("ab cde", 5));
- assertEquals("abc\uD83D\uDE48 ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 9));
- assertEquals("abc\uD83D\uDE48 ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 10));
+ assertEquals("abc ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 7));
+ assertEquals("abc\uD83D\uDE48 ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 8));
+ assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 9));
}
@Test