diff options
4 files changed, 59 insertions, 18 deletions
diff --git a/container-disc/pom.xml b/container-disc/pom.xml index 01b54a6e9c6..d3319480ab3 100644 --- a/container-disc/pom.xml +++ b/container-disc/pom.xml @@ -187,7 +187,7 @@ hk2-utils-${hk2.version}.jar, jackson-annotations-${jackson2.version}.jar, jackson-core-${jackson2.version}.jar, - jackson-databind-${jackson2.version}.jar, + jackson-databind-${jackson-databind.version}.jar, jackson-datatype-jdk8-${jackson2.version}.jar, jackson-datatype-jsr310-${jackson2.version}.jar, jackson-jaxrs-base-${jackson2.version}.jar, diff --git a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java index ae34de2c136..87b70134902 100644 --- a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java +++ b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java @@ -3,6 +3,9 @@ package com.yahoo.document.datatypes; import org.junit.Test; +import static java.lang.Character.MAX_SURROGATE; +import static java.lang.Character.MIN_SURROGATE; + /** * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> * @since 5.1.14 @@ -15,6 +18,12 @@ public class StringFieldValueTestCase { new StringFieldValue("\r"); new StringFieldValue("\n"); for (int c = 0x20; c < 0xFDD0; c++) { + new StringFieldValue("" + Character.toChars(c)); + } + for (int c = 0x20; c < MIN_SURROGATE; c++) { + new StringFieldValue("" + Character.toChars(c)[0]); + } + for (int c = MAX_SURROGATE; c < 0xFDD0; c++) { new StringFieldValue("" + Character.toChars(c)[0]); } for (int c = 0xFDE0; c < 0xFFFF; c++) { diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 7748864ced5..027521ec1ad 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -91,35 +91,59 @@ public final class Text { * returns the first illegal code point if one is found. */ public static OptionalInt validateTextString(String string) { - for (int i = 0; i < string.length(); i++) { + for (int i = 0; i < string.length(); ) { int codePoint = string.codePointAt(i); if ( ! Text.isTextCharacter(codePoint)) return OptionalInt.of(codePoint); - if (Character.isHighSurrogate(string.charAt(i))) - ++i; // // codePointAt() consumes one more char in this case + int charCount = Character.charCount(codePoint); + if (Character.isHighSurrogate(string.charAt(i))) { + if ( charCount == 1) { + return OptionalInt.of(string.codePointAt(i)); + } else if ( !Character.isLowSurrogate(string.charAt(i+1))) { + return OptionalInt.of(string.codePointAt(i+1)); + } + } + i += charCount; } return OptionalInt.empty(); } + private static StringBuilder lazy(StringBuilder sb, String s, int i) { + if (sb == null) { + sb = new StringBuilder(s.substring(0, i)); + } + sb.append(' '); + return sb; + } /** * Returns a string where any invalid characters in the input string is replaced by spaces */ public static String stripInvalidCharacters(String string) { StringBuilder stripped = null; // lazy, as most string will not need stripping - for (int i = 0; i < string.length(); i++) { + for (int i = 0; i < string.length();) { int codePoint = string.codePointAt(i); + int charCount = Character.charCount(codePoint); if ( ! Text.isTextCharacter(codePoint)) { - if (stripped == null) - stripped = new StringBuilder(string.substring(0, i)); - stripped.append(' '); + stripped = lazy(stripped, string, i); + } else { + if (Character.isHighSurrogate(string.charAt(i))) { + if (charCount == 1) { + stripped = lazy(stripped, string, i); + } else if (!Character.isLowSurrogate(string.charAt(i+1))) { + stripped = lazy(stripped, string, i); + } else { + if (stripped != null) { + stripped.appendCodePoint(codePoint); + } + } + } else { + if (stripped != null) { + stripped.appendCodePoint(codePoint); + } + } } - else if (stripped != null) { - stripped.appendCodePoint(codePoint); - } - - if (Character.isHighSurrogate(string.charAt(i))) - ++i; // // codePointAt() consumes one more char in this case + i += charCount; } return stripped != null ? stripped.toString() : string; } diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index 0c1cf9b4b30..389a3c0a126 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -13,13 +13,13 @@ public class TextTestCase { public void testValidateTextString() { assertFalse(Text.validateTextString("valid").isPresent()); assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003")); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); - assertEquals(OptionalInt.of(917503), + assertEquals(OptionalInt.of(0xDFFFF), Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); } @@ -36,6 +36,14 @@ public class TextTestCase { Text.stripInvalidCharacters(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); assertEquals("foo foo", Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); + assertEquals("foo foo", + Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xD800).append("foo").toString())); + } + + @Test + public void testThatHighSurrogateRequireLowSurrogate() { + assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString())); + assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString())); } } |