diff options
author | jonmv <venstad@gmail.com> | 2023-10-06 13:10:45 +0200 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2023-10-06 13:10:45 +0200 |
commit | 38de8b1cf3e0772d98ae76bd3b46620a8d8a2475 (patch) | |
tree | f4ea179d9c1dc324919fdbf2a32fbc85ce077662 | |
parent | 3dd336bbeadfc750bc097e90f2c15f8b9e101b39 (diff) |
0xFFFE, 0xFFFF and stand-alone low surrogates are not valid text
-rw-r--r-- | document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java | 17 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 7 |
2 files changed, 14 insertions, 10 deletions
diff --git a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java index 7a1b16c14ee..cce0a4402e7 100644 --- a/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java +++ b/document/src/test/java/com/yahoo/document/datatypes/StringFieldValueTestCase.java @@ -7,7 +7,7 @@ import static java.lang.Character.MAX_SURROGATE; import static java.lang.Character.MIN_SURROGATE; /** - * @author <a href="mailto:einarmr@yahoo-inc.com">Einar M R Rosenvinge</a> + * @author Einar M R Rosenvinge * @since 5.1.14 */ public class StringFieldValueTestCase { @@ -17,16 +17,13 @@ public class StringFieldValueTestCase { new StringFieldValue("\t"); new StringFieldValue("\r"); new StringFieldValue("\n"); - for (int c = 0x20; c < 0xFDD0; c++) { - new StringFieldValue("" + Character.toChars(c)); - } for (int c = 0x20; c < MIN_SURROGATE; c++) { new StringFieldValue("" + Character.toChars(c)[0]); } - for (int c = MAX_SURROGATE; c < 0xFDD0; c++) { + for (int c = MAX_SURROGATE + 1; c < 0xFDD0; c++) { new StringFieldValue("" + Character.toChars(c)[0]); } - for (int c = 0xFDE0; c < 0xFFFF; c++) { + for (int c = 0xFDE0; c < 0xFFFE; c++) { new StringFieldValue("" + Character.toChars(c)[0]); } for (int c = 0x10000; c < 0x1FFFE; c++) { @@ -272,6 +269,14 @@ public class StringFieldValueTestCase { new StringFieldValue("\uFDDF"); } @Test(expected = IllegalArgumentException.class) + public void requireThatControlCharFailsFFFE() { + new StringFieldValue("\uFFFE"); + } + @Test(expected = IllegalArgumentException.class) + public void requireThatControlCharFailsFFFF() { + new StringFieldValue("\uFFFF"); + } + @Test(expected = IllegalArgumentException.class) public void requireThatControlCharFails1FFFE() { new StringFieldValue("\uD83F\uDFFE"); } diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index a2e7a696857..474702a74b3 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -50,13 +50,12 @@ public final class Text { return (codepoint < 0x80) ? allowedAsciiChars[codepoint] - : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint); + : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint); } private static boolean isTextCharAboveMinSurrogate(int codepoint) { - if (codepoint <= Character.MAX_HIGH_SURROGATE) return false; + if (codepoint <= Character.MAX_SURROGATE) return false; if (codepoint < 0xFDD0) return true; if (codepoint <= 0xFDDF) return false; - if (codepoint < 0x10000) return true; if (codepoint >= 0x10FFFE) return false; return (codepoint & 0xffff) < 0xFFFE; } @@ -75,7 +74,7 @@ public final class Text { if (Character.isHighSurrogate(string.charAt(i))) { if ( charCount == 1) { return OptionalInt.of(string.codePointAt(i)); - } else if ( !Character.isLowSurrogate(string.charAt(i+1))) { + } else if ( ! Character.isLowSurrogate(string.charAt(i+1))) { return OptionalInt.of(string.codePointAt(i+1)); } } |