diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-15 11:30:12 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-09-15 11:36:08 +0200 |
commit | 4875240fd78357fafa8ea25a14a67298333506ca (patch) | |
tree | c743ac2d8e744e539533081775219d670c8fcfcf /vespajlib/src/main/java | |
parent | 15a597984592cef5fab77c9a72a3b04a356a4943 (diff) |
- Add utility to do substring extraction by codepoints, instead of java char index.
- Test and use it in SubstringExpression in indeing language.
Diffstat (limited to 'vespajlib/src/main/java')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index adf91a9b21e..2f0051d4795 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -182,6 +182,25 @@ public final class Text { return s.substring(0, length - 4) + " ..."; } + public static String substringByCodepoints(String s, int fromCP, int toCP) { + int len = s.length(); + if ((fromCP >= len) || (fromCP >= toCP)) return ""; + + int from = s.offsetByCodePoints(0, fromCP); + if (from >= len) return ""; + int lenCP = toCP - fromCP; + if (from + lenCP >= len) return s.substring(from); + + try { + int to = s.offsetByCodePoints(from, toCP - fromCP); + return (to >= len) + ? s.substring(from) + : s.substring(from, to); + } catch (IndexOutOfBoundsException e) { + return s.substring(from); + } + } + public static String format(String format, Object... args) { return String.format(Locale.US, format, args); } |