aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2023-10-06 13:10:45 +0200
committerjonmv <venstad@gmail.com>2023-10-06 13:10:45 +0200
commit38de8b1cf3e0772d98ae76bd3b46620a8d8a2475 (patch)
treef4ea179d9c1dc324919fdbf2a32fbc85ce077662
parent3dd336bbeadfc750bc097e90f2c15f8b9e101b39 (diff)
0xFFFE, 0xFFFF and stand-alone low surrogates are not valid text
-rw-r--r--document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java17
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java7
2 files changed, 14 insertions, 10 deletions
diff --git a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
index 7a1b16c14ee..cce0a4402e7 100644
--- a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
+++ b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java
@@ -7,7 +7,7 @@ import static java.lang.Character.MAX_SURROGATE;
import static java.lang.Character.MIN_SURROGATE;
/**
- * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a>
+ * @author Einar M R Rosenvinge
* @since 5.1.14
*/
public class StringFieldValueTestCase {
@@ -17,16 +17,13 @@ public class StringFieldValueTestCase {
new StringFieldValue("\t");
new StringFieldValue("\r");
new StringFieldValue("\n");
- for (int c = 0x20; c < 0xFDD0; c++) {
- new StringFieldValue("" + Character.toChars(c));
- }
for (int c = 0x20; c < MIN_SURROGATE; c++) {
new StringFieldValue("" + Character.toChars(c)[0]);
}
- for (int c = MAX_SURROGATE; c < 0xFDD0; c++) {
+ for (int c = MAX_SURROGATE + 1; c < 0xFDD0; c++) {
new StringFieldValue("" + Character.toChars(c)[0]);
}
- for (int c = 0xFDE0; c < 0xFFFF; c++) {
+ for (int c = 0xFDE0; c < 0xFFFE; c++) {
new StringFieldValue("" + Character.toChars(c)[0]);
}
for (int c = 0x10000; c < 0x1FFFE; c++) {
@@ -272,6 +269,14 @@ public class StringFieldValueTestCase {
new StringFieldValue("\uFDDF");
}
@Test(expected = IllegalArgumentException.class)
+ public void requireThatControlCharFailsFFFE() {
+ new StringFieldValue("\uFFFE");
+ }
+ @Test(expected = IllegalArgumentException.class)
+ public void requireThatControlCharFailsFFFF() {
+ new StringFieldValue("\uFFFF");
+ }
+ @Test(expected = IllegalArgumentException.class)
public void requireThatControlCharFails1FFFE() {
new StringFieldValue("\uD83F\uDFFE");
}
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index a2e7a696857..474702a74b3 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -50,13 +50,12 @@ public final class Text {
return (codepoint < 0x80)
? allowedAsciiChars[codepoint]
- : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint);
+ : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint);
}
private static boolean isTextCharAboveMinSurrogate(int codepoint) {
- if (codepoint <= Character.MAX_HIGH_SURROGATE) return false;
+ if (codepoint <= Character.MAX_SURROGATE) return false;
if (codepoint < 0xFDD0) return true;
if (codepoint <= 0xFDDF) return false;
- if (codepoint < 0x10000) return true;
if (codepoint >= 0x10FFFE) return false;
return (codepoint & 0xffff) < 0xFFFE;
}
@@ -75,7 +74,7 @@ public final class Text {
if (Character.isHighSurrogate(string.charAt(i))) {
if ( charCount == 1) {
return OptionalInt.of(string.codePointAt(i));
- } else if ( !Character.isLowSurrogate(string.charAt(i+1))) {
+ } else if ( ! Character.isLowSurrogate(string.charAt(i+1))) {
return OptionalInt.of(string.codePointAt(i+1));
}
}