diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2018-09-19 09:14:17 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2018-09-19 09:14:17 +0200 |
commit | eae6a10406f3ccff0350890adcf2af8713ae8806 (patch) | |
tree | 77d8a09995eb78d08ae54079a5923c97a64dfe6d /vespajlib | |
parent | 4d6acff9649baf0f7ed8468624b181fcbf3ffeb7 (diff) |
Ensure that you can not trick invalid characters past the text verification in stringfieldvalue.
Diffstat (limited to 'vespajlib')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 50 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 16 |
2 files changed, 49 insertions, 17 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 7748864ced5..027521ec1ad 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -91,35 +91,59 @@ public final class Text { * returns the first illegal code point if one is found. */ public static OptionalInt validateTextString(String string) { - for (int i = 0; i < string.length(); i++) { + for (int i = 0; i < string.length(); ) { int codePoint = string.codePointAt(i); if ( ! Text.isTextCharacter(codePoint)) return OptionalInt.of(codePoint); - if (Character.isHighSurrogate(string.charAt(i))) - ++i; // // codePointAt() consumes one more char in this case + int charCount = Character.charCount(codePoint); + if (Character.isHighSurrogate(string.charAt(i))) { + if ( charCount == 1) { + return OptionalInt.of(string.codePointAt(i)); + } else if ( !Character.isLowSurrogate(string.charAt(i+1))) { + return OptionalInt.of(string.codePointAt(i+1)); + } + } + i += charCount; } return OptionalInt.empty(); } + private static StringBuilder lazy(StringBuilder sb, String s, int i) { + if (sb == null) { + sb = new StringBuilder(s.substring(0, i)); + } + sb.append(' '); + return sb; + } /** * Returns a string where any invalid characters in the input string is replaced by spaces */ public static String stripInvalidCharacters(String string) { StringBuilder stripped = null; // lazy, as most string will not need stripping - for (int i = 0; i < string.length(); i++) { + for (int i = 0; i < string.length();) { int codePoint = string.codePointAt(i); + int charCount = Character.charCount(codePoint); if ( ! Text.isTextCharacter(codePoint)) { - if (stripped == null) - stripped = new StringBuilder(string.substring(0, i)); - stripped.append(' '); + stripped = lazy(stripped, string, i); + } else { + if (Character.isHighSurrogate(string.charAt(i))) { + if (charCount == 1) { + stripped = lazy(stripped, string, i); + } else if (!Character.isLowSurrogate(string.charAt(i+1))) { + stripped = lazy(stripped, string, i); + } else { + if (stripped != null) { + stripped.appendCodePoint(codePoint); + } + } + } else { + if (stripped != null) { + stripped.appendCodePoint(codePoint); + } + } } - else if (stripped != null) { - stripped.appendCodePoint(codePoint); - } - - if (Character.isHighSurrogate(string.charAt(i))) - ++i; // // codePointAt() consumes one more char in this case + i += charCount; } return stripped != null ? stripped.toString() : string; } diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index 0c1cf9b4b30..389a3c0a126 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -13,13 +13,13 @@ public class TextTestCase { public void testValidateTextString() { assertFalse(Text.validateTextString("valid").isPresent()); assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003")); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); } @@ -36,6 +36,14 @@ public class TextTestCase { Text.stripInvalidCharacters(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); assertEquals("foo foo", Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); + assertEquals("foo foo", + Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xD800).append("foo").toString())); + } + + @Test + public void testThatHighSurrogateRequireLowSurrogate() { + assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString())); + assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString())); } } |