diff options
Diffstat (limited to 'vespajlib/src')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 14 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 22 |
2 files changed, 33 insertions, 3 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 7c835965a1a..e133407a967 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -177,8 +177,8 @@ public final class Text { */ public static String truncate(String s, int length) { if (s.length() <= length) return s; - if (length <= 4) return s.substring(0, length); - return s.substring(0, length - 4) + " ..."; + if (length <= 4) return safeSubstring(s, length); + return safeSubstring(s, length - 4) + " ..."; } public static String substringByCodepoints(String s, int fromCP, int toCP) { @@ -208,4 +208,14 @@ public final class Text { public static String format(String format, Object... args) { return String.format(Locale.US, format, args); } + + /** Like {@link String#substring(int)}, but if this would split a surrogate pair at the end, the leading high surrogate is also cut. */ + public static String safeSubstring(String s, int length) { + boolean pairCut = 0 < length + && length < s.length() + && Character.isHighSurrogate(s.charAt(length - 1)) + && Character.isLowSurrogate(s.charAt(length)); + return s.substring(0, length - (pairCut ? 1 : 0)); + } + } diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index f192f678c13..b4324797086 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -83,6 +83,24 @@ public class TextTestCase { } @Test + public void testSafeSubstring() { + String withSurrogates = "abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef"; + assertEquals("", Text.safeSubstring(withSurrogates, 0)); + assertEquals("a", Text.safeSubstring(withSurrogates, 1)); + assertEquals("ab", Text.safeSubstring(withSurrogates, 2)); + assertEquals("abc", Text.safeSubstring(withSurrogates, 3)); + assertEquals("abc", Text.safeSubstring(withSurrogates, 4)); + assertEquals("abc\uD83D\uDE48", Text.safeSubstring(withSurrogates, 5)); + assertEquals("abc\uD83D\uDE48", Text.safeSubstring(withSurrogates, 6)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49", Text.safeSubstring(withSurrogates, 7)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49", Text.safeSubstring(withSurrogates, 8)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4A", Text.safeSubstring(withSurrogates, 9)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Ad", Text.safeSubstring(withSurrogates, 10)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Ade", Text.safeSubstring(withSurrogates, 11)); + assertEquals("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", Text.safeSubstring(withSurrogates, 12)); + } + + @Test public void testIsDisplayable() { assertTrue(Text.isDisplayable('A')); assertTrue(Text.isDisplayable('a')); @@ -104,6 +122,8 @@ public class TextTestCase { assertEquals("", Text.truncate("ab", 0)); assertEquals("ab c", Text.truncate("ab cde", 4)); assertEquals("a ...", Text.truncate("ab cde", 5)); + assertEquals("abc\uD83D\uDE48 ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 9)); + assertEquals("abc\uD83D\uDE48 ...", Text.truncate("abc\uD83D\uDE48\uD83D\uDE49\uD83D\uDE4Adef", 10)); } @Test @@ -152,6 +172,6 @@ public class TextTestCase { sum = benchmarkIsValid(strings, 100000000); diff = System.nanoTime() - start; System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns"); - } + } |