summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@oath.com>2017-10-20 13:42:12 +0200
committerGeir Storli <geirst@oath.com>2017-10-20 13:43:42 +0200
commit5bd2f9bc49ed4bd9b033005ee7f9ecd624c6b9c9 (patch)
treecf81e3d65747caec2818dd49e0d89f1541144a9d /vespajlib
parentd82d7c04128c23bc6c3ec5b8e7e822315055ca29 (diff)
Validate that document ids only contain text characters.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java25
1 files changed, 23 insertions, 2 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 2b670e5d727..684cdbbcb3e 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -1,5 +1,7 @@
package com.yahoo.text;
+import java.util.OptionalInt;
+
/**
* Text utility functions.
*
@@ -81,6 +83,25 @@ public final class Text {
if (codepoint <= 0x10FFFF) return false;
return true;
- }
-
+ }
+
+ /**
+ * Validates that the given string value only contains text characters and
+ * returns the first illegal code point if one is found.
+ */
+ public static OptionalInt validateTextString(String value) {
+ for (int i = 0; i < value.length(); i++) {
+ char theChar = value.charAt(i);
+ int codePoint = value.codePointAt(i);
+ if (Character.isHighSurrogate(theChar)) {
+ // Skip one char ahead, since codePointAt() consumes one more char in this case
+ ++i;
+ }
+ if (!Text.isTextCharacter(codePoint)) {
+ return OptionalInt.of(codePoint);
+ }
+ }
+ return OptionalInt.empty();
+ }
+
}