diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-15 11:30:12 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-15 11:36:08 +0200 |
commit | 4875240fd78357fafa8ea25a14a67298333506ca (patch) | |
tree | c743ac2d8e744e539533081775219d670c8fcfcf /vespajlib/src/test | |
parent | 15a597984592cef5fab77c9a72a3b04a356a4943 (diff) |
- Add utility to do substring extraction by codepoints, instead of java char index.
- Test and use it in SubstringExpression in indeing language.
Diffstat (limited to 'vespajlib/src/test')
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index 033918f0bad..2639882230f 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -50,6 +50,37 @@ public class TextTestCase { validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString()); } + static private String fromCP(String prefix, int [] codePoints, String suffix) { + StringBuilder sb = new StringBuilder(prefix); + for (int cp : codePoints) { + sb.appendCodePoint(cp); + } + sb.append(suffix); + return sb.toString(); + } + + @Test + public void testSubstringByCodePoint() { + assertEquals("", Text.substringByCodepoints("", 0, 0)); + assertEquals("", Text.substringByCodepoints("abcdef", 0, 0)); + assertEquals("", Text.substringByCodepoints("abcdef", 3, 3)); + assertEquals("", Text.substringByCodepoints("abcdef", 3, 2)); + assertEquals("", Text.substringByCodepoints("abcdef", 7, 9)); + assertEquals("abcdef", Text.substringByCodepoints("abcdef", 0, 9)); + assertEquals("a", Text.substringByCodepoints("abcdef", 0, 1)); + assertEquals("cd", Text.substringByCodepoints("abcdef", 2, 4)); + + String withSurrogates = fromCP("abc", new int[]{0x10F000, 0x10F001, 0x10F002}, "def"); + assertEquals(withSurrogates, Text.substringByCodepoints(withSurrogates, 0, 11)); + assertEquals(withSurrogates, Text.substringByCodepoints(withSurrogates, 0, 20)); + assertEquals(fromCP("bc", new int[]{0x10F000, 0x10F001}, ""), + Text.substringByCodepoints(withSurrogates, 1, 5)); + assertEquals(fromCP("", new int[]{0x10F001}, ""), + Text.substringByCodepoints(withSurrogates, 4, 5)); + assertEquals(fromCP("", new int[]{0x10F001, 0x10F002}, "de"), + Text.substringByCodepoints(withSurrogates, 4, 8)); + } + @Test public void testIsDisplayable() { assertTrue(Text.isDisplayable('A')); |