diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-04-06 15:01:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-06 15:01:03 +0200 |
commit | 7ac6ebd9fc7c3f1b42d374565149aec25fb61550 (patch) | |
tree | 9fd80f836ead377bd06190d90d991fc37ce4c74d | |
parent | 4211f68e438f9a068070b5d83146b557018963f2 (diff) | |
parent | 8e8ebf962e6b4305440b4061499d587e1ea1f11e (diff) |
Merge pull request #21998 from vespa-engine/balder/refactor-to-favor-hot-path
Refactor to optimize for hot path.
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 27 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 40 |
2 files changed, 36 insertions, 31 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 30eba3ebd65..d1712a20626 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -50,9 +50,10 @@ public final class Text { return (codepoint < 0x80) ? allowedAsciiChars[codepoint] - : isTextCharAboveUsAscii(codepoint); + : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint); } - private static boolean isTextCharAboveUsAscii(int codepoint) { + private static boolean isTextCharAboveMinSurrogate(int codepoint) { + if (codepoint <= Character.MAX_HIGH_SURROGATE) return false; if (codepoint < 0xFDD0) return true; if (codepoint <= 0xFDDF) return false; if (codepoint < 0x1FFFE) return true; @@ -86,9 +87,7 @@ public final class Text { if (codepoint < 0xFFFFE) return true; if (codepoint <= 0xFFFFF) return false; if (codepoint < 0x10FFFE) return true; - if (codepoint <= 0x10FFFF) return false; - - return true; + return false; } /** @@ -118,19 +117,23 @@ public final class Text { * Validates that the given string value only contains text characters. */ public static boolean isValidTextString(String string) { - for (int i = 0; i < string.length(); ) { + int length = string.length(); + for (int i = 0; i < length; ) { int codePoint = string.codePointAt(i); - if ( ! Text.isTextCharacter(codePoint)) return false; - - int charCount = Character.charCount(codePoint); - if (Character.isHighSurrogate(string.charAt(i))) { - if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false; + if (codePoint < 0x80) { + if ( ! allowedAsciiChars[codePoint]) return false; + } else if (codePoint >= Character.MIN_SURROGATE) { + if ( ! isTextCharAboveMinSurrogate(codePoint)) return false; + if ( ! Character.isBmpCodePoint(codePoint)) { + i++; + } } - i += charCount; + i++; } return true; } + /** Returns whether the given code point is displayable. */ public static boolean isDisplayable(int codePoint) { switch (Character.getType(codePoint)) { diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index 33274380aad..033918f0bad 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -12,18 +12,18 @@ import static org.junit.Assert.assertTrue; public class TextTestCase { + private static void validateText(OptionalInt expect, String text) { + assertEquals(expect, Text.validateTextString(text)); + assertEquals(expect.isEmpty(), Text.isValidTextString(text)); + } @Test public void testValidateTextString() { - assertFalse(Text.validateTextString("valid").isPresent()); - assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003")); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString())); - assertEquals(OptionalInt.of(0xDFFFF), - Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString())); + validateText(OptionalInt.empty(), "valid"); + validateText(OptionalInt.of(1), "text\u0001text\u0003"); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()); + validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()); } @Test @@ -45,8 +45,9 @@ public class TextTestCase { @Test public void testThatHighSurrogateRequireLowSurrogate() { - assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString())); - assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString())); + validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).toString()); + validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()); + validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString()); } @Test @@ -78,7 +79,7 @@ public class TextTestCase { assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536)); } - private static long isValid(String [] strings, int num) { + private static long benchmarkIsValid(String [] strings, int num) { long sum = 0; for (int i=0; i < num; i++) { if (Text.isValidTextString(strings[i%strings.length])) { @@ -87,7 +88,8 @@ public class TextTestCase { } return sum; } - private static long validate(String [] strings, int num) { + + private static long benchmarkValidate(String [] strings, int num) { long sum = 0; for (int i=0; i < num; i++) { if (Text.validateTextString(strings[i%strings.length]).isEmpty()) { @@ -99,23 +101,23 @@ public class TextTestCase { @Ignore @Test - public void benchmarkValidate() { + public void benchmarkTextValidation() { String [] strings = new String[100]; for (int i=0; i < strings.length; i++) { strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString(); } - long sum = validate(strings, 1000000); + long sum = benchmarkValidate(strings, 1000000); System.out.println("Warmup num validate = " + sum); - sum = isValid(strings, 1000000); + sum = benchmarkIsValid(strings, 1000000); System.out.println("Warmup num isValid = " + sum); long start = System.nanoTime(); - sum = validate(strings, 100000000); + sum = benchmarkValidate(strings, 100000000); long diff = System.nanoTime() - start; System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns"); start = System.nanoTime(); - sum = isValid(strings, 100000000); + sum = benchmarkIsValid(strings, 100000000); diff = System.nanoTime() - start; System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns"); |