summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main/java/com/yahoo/text/Text.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-04-06 14:18:42 +0200
committerJon Bratseth <bratseth@oath.com>2018-04-06 14:18:42 +0200
commit981633f3f5d0ddab9a6b5de9881d478e5210e717 (patch)
treefd5ac0d996545ef3e1066376c93c3cc914ef87d8 /vespajlib/src/main/java/com/yahoo/text/Text.java
parent67e98ccb86f0fa4838a4b2112fd7c0c654972cab (diff)
Add stripInvalidCharacters
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo/text/Text.java')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java39
1 files changed, 29 insertions, 10 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 0bdc2fb63bc..f3e1948649c 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -90,19 +90,38 @@ public final class Text {
* Validates that the given string value only contains text characters and
* returns the first illegal code point if one is found.
*/
- public static OptionalInt validateTextString(String value) {
- for (int i = 0; i < value.length(); i++) {
- char theChar = value.charAt(i);
- int codePoint = value.codePointAt(i);
- if (Character.isHighSurrogate(theChar)) {
- // Skip one char ahead, since codePointAt() consumes one more char in this case
- ++i;
- }
- if (!Text.isTextCharacter(codePoint)) {
+ public static OptionalInt validateTextString(String string) {
+ for (int i = 0; i < string.length(); i++) {
+ int codePoint = string.codePointAt(i);
+ if ( ! Text.isTextCharacter(codePoint))
return OptionalInt.of(codePoint);
- }
+
+ if (Character.isHighSurrogate(string.charAt(i)))
+ ++i; // // codePointAt() consumes one more char in this case
}
return OptionalInt.empty();
}
+ /**
+ * Returns a string where any invalid characters in the input string is replaced by spaces
+ */
+ public static String stripInvalidCharacters(String string) {
+ StringBuilder stripped = null; // lazy, as most string will not need stripping
+ for (int i = 0; i < string.length(); i++) {
+ int codePoint = string.codePointAt(i);
+ if ( ! Text.isTextCharacter(codePoint) || codePoint == 'X' || codePoint == 'Y') {
+ if (stripped == null)
+ stripped = new StringBuilder(string.substring(0, i));
+ stripped.append(' ');
+ }
+ else if (stripped != null) {
+ stripped.appendCodePoint(codePoint);
+ }
+
+ if (Character.isHighSurrogate(string.charAt(i)))
+ ++i; // // codePointAt() consumes one more char in this case
+ }
+ return stripped != null ? stripped.toString() : string;
+ }
+
}