summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2018-09-19 09:14:17 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2018-09-19 09:14:17 +0200
commiteae6a10406f3ccff0350890adcf2af8713ae8806 (patch)
tree77d8a09995eb78d08ae54079a5923c97a64dfe6d /vespajlib
parent4d6acff9649baf0f7ed8468624b181fcbf3ffeb7 (diff)
Ensure that you can not trick invalid characters past the text verification in stringfieldvalue.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java50
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java16
2 files changed, 49 insertions, 17 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 7748864ced5..027521ec1ad 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -91,35 +91,59 @@ public final class Text {
* returns the first illegal code point if one is found.
*/
public static OptionalInt validateTextString(String string) {
- for (int i = 0; i < string.length(); i++) {
+ for (int i = 0; i < string.length(); ) {
int codePoint = string.codePointAt(i);
if ( ! Text.isTextCharacter(codePoint))
return OptionalInt.of(codePoint);
- if (Character.isHighSurrogate(string.charAt(i)))
- ++i; // // codePointAt() consumes one more char in this case
+ int charCount = Character.charCount(codePoint);
+ if (Character.isHighSurrogate(string.charAt(i))) {
+ if ( charCount == 1) {
+ return OptionalInt.of(string.codePointAt(i));
+ } else if ( !Character.isLowSurrogate(string.charAt(i+1))) {
+ return OptionalInt.of(string.codePointAt(i+1));
+ }
+ }
+ i += charCount;
}
return OptionalInt.empty();
}
+ private static StringBuilder lazy(StringBuilder sb, String s, int i) {
+ if (sb == null) {
+ sb = new StringBuilder(s.substring(0, i));
+ }
+ sb.append(' ');
+ return sb;
+ }
/**
* Returns a string where any invalid characters in the input string is replaced by spaces
*/
public static String stripInvalidCharacters(String string) {
StringBuilder stripped = null; // lazy, as most string will not need stripping
- for (int i = 0; i < string.length(); i++) {
+ for (int i = 0; i < string.length();) {
int codePoint = string.codePointAt(i);
+ int charCount = Character.charCount(codePoint);
if ( ! Text.isTextCharacter(codePoint)) {
- if (stripped == null)
- stripped = new StringBuilder(string.substring(0, i));
- stripped.append(' ');
+ stripped = lazy(stripped, string, i);
+ } else {
+ if (Character.isHighSurrogate(string.charAt(i))) {
+ if (charCount == 1) {
+ stripped = lazy(stripped, string, i);
+ } else if (!Character.isLowSurrogate(string.charAt(i+1))) {
+ stripped = lazy(stripped, string, i);
+ } else {
+ if (stripped != null) {
+ stripped.appendCodePoint(codePoint);
+ }
+ }
+ } else {
+ if (stripped != null) {
+ stripped.appendCodePoint(codePoint);
+ }
+ }
}
- else if (stripped != null) {
- stripped.appendCodePoint(codePoint);
- }
-
- if (Character.isHighSurrogate(string.charAt(i)))
- ++i; // // codePointAt() consumes one more char in this case
+ i += charCount;
}
return stripped != null ? stripped.toString() : string;
}
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index 0c1cf9b4b30..389a3c0a126 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -13,13 +13,13 @@ public class TextTestCase {
public void testValidateTextString() {
assertFalse(Text.validateTextString("valid").isPresent());
assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003"));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
}
@@ -36,6 +36,14 @@ public class TextTestCase {
Text.stripInvalidCharacters(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
assertEquals("foo foo",
Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
+ assertEquals("foo foo",
+ Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xD800).append("foo").toString()));
+ }
+
+ @Test
+ public void testThatHighSurrogateRequireLowSurrogate() {
+ assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString()));
+ assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()));
}
}