summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-disc/pom.xml2
-rw-r--r--document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java9
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java50
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java16
4 files changed, 59 insertions, 18 deletions
diff --git a/container-disc/pom.xml b/container-disc/pom.xml
index 01b54a6e9c6..d3319480ab3 100644
--- a/container-disc/pom.xml
+++ b/container-disc/pom.xml
@@ -187,7 +187,7 @@
hk2-utils-${hk2.version}.jar,
jackson-annotations-${jackson2.version}.jar,
jackson-core-${jackson2.version}.jar,
- jackson-databind-${jackson2.version}.jar,
+ jackson-databind-${jackson-databind.version}.jar,
jackson-datatype-jdk8-${jackson2.version}.jar,
jackson-datatype-jsr310-${jackson2.version}.jar,
jackson-jaxrs-base-${jackson2.version}.jar,
diff --git a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
index ae34de2c136..87b70134902 100644
--- a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
+++ b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
@@ -3,6 +3,9 @@ package com.yahoo.document.datatypes;
import org.junit.Test;
+import static java.lang.Character.MAX_SURROGATE;
+import static java.lang.Character.MIN_SURROGATE;
+
/**
* @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
* @since 5.1.14
@@ -15,6 +18,12 @@ public class StringFieldValueTestCase {
new StringFieldValue("\r");
new StringFieldValue("\n");
for (int c = 0x20; c < 0xFDD0; c++) {
+ new StringFieldValue("" + Character.toChars(c));
+ }
+ for (int c = 0x20; c < MIN_SURROGATE; c++) {
+ new StringFieldValue("" + Character.toChars(c)[0]);
+ }
+ for (int c = MAX_SURROGATE; c < 0xFDD0; c++) {
new StringFieldValue("" + Character.toChars(c)[0]);
}
for (int c = 0xFDE0; c < 0xFFFF; c++) {
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 7748864ced5..027521ec1ad 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -91,35 +91,59 @@ public final class Text {
* returns the first illegal code point if one is found.
*/
public static OptionalInt validateTextString(String string) {
- for (int i = 0; i < string.length(); i++) {
+ for (int i = 0; i < string.length(); ) {
int codePoint = string.codePointAt(i);
if ( ! Text.isTextCharacter(codePoint))
return OptionalInt.of(codePoint);
- if (Character.isHighSurrogate(string.charAt(i)))
- ++i; // // codePointAt() consumes one more char in this case
+ int charCount = Character.charCount(codePoint);
+ if (Character.isHighSurrogate(string.charAt(i))) {
+ if ( charCount == 1) {
+ return OptionalInt.of(string.codePointAt(i));
+ } else if ( !Character.isLowSurrogate(string.charAt(i+1))) {
+ return OptionalInt.of(string.codePointAt(i+1));
+ }
+ }
+ i += charCount;
}
return OptionalInt.empty();
}
+ private static StringBuilder lazy(StringBuilder sb, String s, int i) {
+ if (sb == null) {
+ sb = new StringBuilder(s.substring(0, i));
+ }
+ sb.append(' ');
+ return sb;
+ }
/**
* Returns a string where any invalid characters in the input string is replaced by spaces
*/
public static String stripInvalidCharacters(String string) {
StringBuilder stripped = null; // lazy, as most string will not need stripping
- for (int i = 0; i < string.length(); i++) {
+ for (int i = 0; i < string.length();) {
int codePoint = string.codePointAt(i);
+ int charCount = Character.charCount(codePoint);
if ( ! Text.isTextCharacter(codePoint)) {
- if (stripped == null)
- stripped = new StringBuilder(string.substring(0, i));
- stripped.append(' ');
+ stripped = lazy(stripped, string, i);
+ } else {
+ if (Character.isHighSurrogate(string.charAt(i))) {
+ if (charCount == 1) {
+ stripped = lazy(stripped, string, i);
+ } else if (!Character.isLowSurrogate(string.charAt(i+1))) {
+ stripped = lazy(stripped, string, i);
+ } else {
+ if (stripped != null) {
+ stripped.appendCodePoint(codePoint);
+ }
+ }
+ } else {
+ if (stripped != null) {
+ stripped.appendCodePoint(codePoint);
+ }
+ }
}
- else if (stripped != null) {
- stripped.appendCodePoint(codePoint);
- }
-
- if (Character.isHighSurrogate(string.charAt(i)))
- ++i; // // codePointAt() consumes one more char in this case
+ i += charCount;
}
return stripped != null ? stripped.toString() : string;
}
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index 0c1cf9b4b30..389a3c0a126 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -13,13 +13,13 @@ public class TextTestCase {
public void testValidateTextString() {
assertFalse(Text.validateTextString("valid").isPresent());
assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003"));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
- assertEquals(OptionalInt.of(917503),
+ assertEquals(OptionalInt.of(0xDFFFF),
Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
}
@@ -36,6 +36,14 @@ public class TextTestCase {
Text.stripInvalidCharacters(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
assertEquals("foo foo",
Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
+ assertEquals("foo foo",
+ Text.stripInvalidCharacters(new StringBuilder("foo").appendCodePoint(0xD800).append("foo").toString()));
+ }
+
+ @Test
+ public void testThatHighSurrogateRequireLowSurrogate() {
+ assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString()));
+ assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()));
}
}