diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-04-05 15:02:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-05 15:02:42 +0200 |
commit | fe5a551b79ca34ea88bdae6411bd85727d8623a0 (patch) | |
tree | 62d2f4517bed15838403a2e12544133ea67b4b18 /vespajlib | |
parent | c2920aae3c29a7bb50217f5249a7a85f8aa772ca (diff) | |
parent | 064c2b97ce88c6ccd904871dbda3be8e720990d4 (diff) |
Merge pull request #21981 from vespa-engine/balder/use-primitives-to-get-more-predictable-jit-inlining
Use a primitive to see if that makes the JIT compiler more predictable.
Diffstat (limited to 'vespajlib')
-rw-r--r-- | vespajlib/abi-spec.json | 1 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Text.java | 23 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/text/TextTestCase.java | 44 |
3 files changed, 67 insertions, 1 deletions
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 20c7d435964..e69631e8375 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -3232,6 +3232,7 @@ "methods": [ "public static boolean isTextCharacter(int)", "public static java.util.OptionalInt validateTextString(java.lang.String)", + "public static boolean isValidTextString(java.lang.String)", "public static boolean isDisplayable(int)", "public static java.lang.String stripInvalidCharacters(java.lang.String)", "public static java.lang.String truncate(java.lang.String, int)", diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java index 662100aa8ea..30eba3ebd65 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Text.java +++ b/vespajlib/src/main/java/com/yahoo/text/Text.java @@ -48,7 +48,11 @@ public final class Text { // The link above notes that 0x7F-0x84 and 0x86-0x9F are discouraged, but they are still allowed - // see http://www.w3.org/International/questions/qa-controls - if (codepoint < 0x80) return allowedAsciiChars[codepoint]; + return (codepoint < 0x80) + ? allowedAsciiChars[codepoint] + : isTextCharAboveUsAscii(codepoint); + } + private static boolean isTextCharAboveUsAscii(int codepoint) { if (codepoint < 0xFDD0) return true; if (codepoint <= 0xFDDF) return false; if (codepoint < 0x1FFFE) return true; @@ -110,6 +114,23 @@ public final class Text { return OptionalInt.empty(); } + /** + * Validates that the given string value only contains text characters. + */ + public static boolean isValidTextString(String string) { + for (int i = 0; i < string.length(); ) { + int codePoint = string.codePointAt(i); + if ( ! Text.isTextCharacter(codePoint)) return false; + + int charCount = Character.charCount(codePoint); + if (Character.isHighSurrogate(string.charAt(i))) { + if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false; + } + i += charCount; + } + return true; + } + /** Returns whether the given code point is displayable. */ public static boolean isDisplayable(int codePoint) { switch (Character.getType(codePoint)) { diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java index a2cb2158278..33274380aad 100644 --- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.text; +import org.junit.Ignore; import org.junit.Test; import java.util.OptionalInt; @@ -76,4 +77,47 @@ public class TextTestCase { public void testFormat() { assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536)); } + + private static long isValid(String [] strings, int num) { + long sum = 0; + for (int i=0; i < num; i++) { + if (Text.isValidTextString(strings[i%strings.length])) { + sum++; + } + } + return sum; + } + private static long validate(String [] strings, int num) { + long sum = 0; + for (int i=0; i < num; i++) { + if (Text.validateTextString(strings[i%strings.length]).isEmpty()) { + sum++; + } + } + return sum; + } + + @Ignore + @Test + public void benchmarkValidate() { + String [] strings = new String[100]; + for (int i=0; i < strings.length; i++) { + strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString(); + } + long sum = validate(strings, 1000000); + System.out.println("Warmup num validate = " + sum); + sum = isValid(strings, 1000000); + System.out.println("Warmup num isValid = " + sum); + + long start = System.nanoTime(); + sum = validate(strings, 100000000); + long diff = System.nanoTime() - start; + System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns"); + + start = System.nanoTime(); + sum = isValid(strings, 100000000); + diff = System.nanoTime() - start; + System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns"); + + } } |