diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2022-04-06 19:37:30 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-06 19:37:30 +0200 |
commit | 346e8e66e9c8d7f06c336d6c27919551604f18c0 (patch) | |
tree | 3453769ddca6a16fed065fb5552ab6cd73cb0145 /vespajlib | |
parent | 039589faf5f989d80b9fec2b28ed955ac6fd86f6 (diff) | |
parent | cf3f7bbb9e99f5b3e6cf3ac3f93e813042e4a12c (diff) |
Merge branch 'master' into jonmv/more-http-url
Diffstat (limited to 'vespajlib')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 28 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 40 |
2 files changed, 36 insertions, 32 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 501ca980187..d1712a20626 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -50,10 +50,10 @@ public final class Text { return (codepoint < 0x80) ? allowedAsciiChars[codepoint] - : isTextCharAboveUsAscii(codepoint); + : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint); } - private static boolean isTextCharAboveUsAscii(int codepoint) { - // TODO jonmv: compute modulo? + private static boolean isTextCharAboveMinSurrogate(int codepoint) { + if (codepoint <= Character.MAX_HIGH_SURROGATE) return false; if (codepoint < 0xFDD0) return true; if (codepoint <= 0xFDDF) return false; if (codepoint < 0x1FFFE) return true; @@ -87,9 +87,7 @@ public final class Text { if (codepoint < 0xFFFFE) return true; if (codepoint <= 0xFFFFF) return false; if (codepoint < 0x10FFFE) return true; - if (codepoint <= 0x10FFFF) return false; - - return true; + return false; } /** @@ -119,19 +117,23 @@ public final class Text { * Validates that the given string value only contains text characters. */ public static boolean isValidTextString(String string) { - for (int i = 0; i < string.length(); ) { + int length = string.length(); + for (int i = 0; i < length; ) { int codePoint = string.codePointAt(i); - if ( ! Text.isTextCharacter(codePoint)) return false; - - int charCount = Character.charCount(codePoint); - if (Character.isHighSurrogate(string.charAt(i))) { - if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false; + if (codePoint < 0x80) { + if ( ! allowedAsciiChars[codePoint]) return false; + } else if (codePoint >= Character.MIN_SURROGATE) { + if ( ! isTextCharAboveMinSurrogate(codePoint)) return false; + if ( ! Character.isBmpCodePoint(codePoint)) { + i++; + } } - i += charCount; + i++; } return true; } + /** Returns whether the given code point is displayable. */ public static boolean isDisplayable(int codePoint) { switch (Character.getType(codePoint)) { diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index 33274380aad..033918f0bad 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -12,18 +12,18 @@ import static org.junit.Assert.assertTrue; public class TextTestCase { + private static void validateText(OptionalInt expect, String text) { + assertEquals(expect, Text.validateTextString(text)); + assertEquals(expect.isEmpty(), Text.isValidTextString(text)); + } @Test public void testValidateTextString() { - assertFalse(Text.validateTextString("valid").isPresent()); - assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003")); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); + validateText(OptionalInt.empty(), "valid"); + validateText(OptionalInt.of(1), "text\u0001text\u0003"); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()); } @Test @@ -45,8 +45,9 @@ public class TextTestCase { @Test public void testThatHighSurrogateRequireLowSurrogate() { - assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString())); - assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString())); + validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).toString()); + validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()); + validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString()); } @Test @@ -78,7 +79,7 @@ public class TextTestCase { assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536)); } - private static long isValid(String [] strings, int num) { + private static long benchmarkIsValid(String [] strings, int num) { long sum = 0; for (int i=0; i < num; i++) { if (Text.isValidTextString(strings[i%strings.length])) { @@ -87,7 +88,8 @@ public class TextTestCase { } return sum; } - private static long validate(String [] strings, int num) { + + private static long benchmarkValidate(String [] strings, int num) { long sum = 0; for (int i=0; i < num; i++) { if (Text.validateTextString(strings[i%strings.length]).isEmpty()) { @@ -99,23 +101,23 @@ public class TextTestCase { @Ignore @Test - public void benchmarkValidate() { + public void benchmarkTextValidation() { String [] strings = new String[100]; for (int i=0; i < strings.length; i++) { strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString(); } - long sum = validate(strings, 1000000); + long sum = benchmarkValidate(strings, 1000000); System.out.println("Warmup num validate = " + sum); - sum = isValid(strings, 1000000); + sum = benchmarkIsValid(strings, 1000000); System.out.println("Warmup num isValid = " + sum); long start = System.nanoTime(); - sum = validate(strings, 100000000); + sum = benchmarkValidate(strings, 100000000); long diff = System.nanoTime() - start; System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns"); start = System.nanoTime(); - sum = isValid(strings, 100000000); + sum = benchmarkIsValid(strings, 100000000); diff = System.nanoTime() - start; System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns"); |