summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-09-15 11:30:12 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2023-09-15 11:36:08 +0200
commit4875240fd78357fafa8ea25a14a67298333506ca (patch)
treec743ac2d8e744e539533081775219d670c8fcfcf /vespajlib/src/main/java
parent15a597984592cef5fab77c9a72a3b04a356a4943 (diff)
- Add utility to do substring extraction by codepoints, instead of java char index.
- Test and use it in SubstringExpression in indeing language.
Diffstat (limited to 'vespajlib/src/main/java')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java19
1 files changed, 19 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index adf91a9b21e..2f0051d4795 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -182,6 +182,25 @@ public final class Text {
return s.substring(0, length - 4) + " ...";
}
+ public static String substringByCodepoints(String s, int fromCP, int toCP) {
+ int len = s.length();
+ if ((fromCP >= len) || (fromCP >= toCP)) return "";
+
+ int from = s.offsetByCodePoints(0, fromCP);
+ if (from >= len) return "";
+ int lenCP = toCP - fromCP;
+ if (from + lenCP >= len) return s.substring(from);
+
+ try {
+ int to = s.offsetByCodePoints(from, toCP - fromCP);
+ return (to >= len)
+ ? s.substring(from)
+ : s.substring(from, to);
+ } catch (IndexOutOfBoundsException e) {
+ return s.substring(from);
+ }
+ }
+
public static String format(String format, Object... args) {
return String.format(Locale.US, format, args);
}