aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-04-06 15:01:03 +0200
committerGitHub <noreply@github.com>2022-04-06 15:01:03 +0200
commit7ac6ebd9fc7c3f1b42d374565149aec25fb61550 (patch)
tree9fd80f836ead377bd06190d90d991fc37ce4c74d
parent4211f68e438f9a068070b5d83146b557018963f2 (diff)
parent8e8ebf962e6b4305440b4061499d587e1ea1f11e (diff)
Merge pull request #21998 from vespa-engine/balder/refactor-to-favor-hot-path
Refactor to optimize for hot path.
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java27
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java40
2 files changed, 36 insertions, 31 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 30eba3ebd65..d1712a20626 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -50,9 +50,10 @@ public final class Text {
return (codepoint < 0x80)
? allowedAsciiChars[codepoint]
- : isTextCharAboveUsAscii(codepoint);
+ : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint);
}
- private static boolean isTextCharAboveUsAscii(int codepoint) {
+ private static boolean isTextCharAboveMinSurrogate(int codepoint) {
+ if (codepoint <= Character.MAX_HIGH_SURROGATE) return false;
if (codepoint < 0xFDD0) return true;
if (codepoint <= 0xFDDF) return false;
if (codepoint < 0x1FFFE) return true;
@@ -86,9 +87,7 @@ public final class Text {
if (codepoint < 0xFFFFE) return true;
if (codepoint <= 0xFFFFF) return false;
if (codepoint < 0x10FFFE) return true;
- if (codepoint <= 0x10FFFF) return false;
-
- return true;
+ return false;
}
/**
@@ -118,19 +117,23 @@ public final class Text {
* Validates that the given string value only contains text characters.
*/
public static boolean isValidTextString(String string) {
- for (int i = 0; i < string.length(); ) {
+ int length = string.length();
+ for (int i = 0; i < length; ) {
int codePoint = string.codePointAt(i);
- if ( ! Text.isTextCharacter(codePoint)) return false;
-
- int charCount = Character.charCount(codePoint);
- if (Character.isHighSurrogate(string.charAt(i))) {
- if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false;
+ if (codePoint < 0x80) {
+ if ( ! allowedAsciiChars[codePoint]) return false;
+ } else if (codePoint >= Character.MIN_SURROGATE) {
+ if ( ! isTextCharAboveMinSurrogate(codePoint)) return false;
+ if ( ! Character.isBmpCodePoint(codePoint)) {
+ i++;
+ }
}
- i += charCount;
+ i++;
}
return true;
}
+
/** Returns whether the given code point is displayable. */
public static boolean isDisplayable(int codePoint) {
switch (Character.getType(codePoint)) {
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index 33274380aad..033918f0bad 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -12,18 +12,18 @@ import static org.junit.Assert.assertTrue;
public class TextTestCase {
+ private static void validateText(OptionalInt expect, String text) {
+ assertEquals(expect, Text.validateTextString(text));
+ assertEquals(expect.isEmpty(), Text.isValidTextString(text));
+ }
@Test
public void testValidateTextString() {
- assertFalse(Text.validateTextString("valid").isPresent());
- assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003"));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
+ validateText(OptionalInt.empty(), "valid");
+ validateText(OptionalInt.of(1), "text\u0001text\u0003");
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString());
}
@Test
@@ -45,8 +45,9 @@ public class TextTestCase {
@Test
public void testThatHighSurrogateRequireLowSurrogate() {
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString()));
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()));
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).toString());
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString());
+ validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString());
}
@Test
@@ -78,7 +79,7 @@ public class TextTestCase {
assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536));
}
- private static long isValid(String [] strings, int num) {
+ private static long benchmarkIsValid(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.isValidTextString(strings[i%strings.length])) {
@@ -87,7 +88,8 @@ public class TextTestCase {
}
return sum;
}
- private static long validate(String [] strings, int num) {
+
+ private static long benchmarkValidate(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.validateTextString(strings[i%strings.length]).isEmpty()) {
@@ -99,23 +101,23 @@ public class TextTestCase {
@Ignore
@Test
- public void benchmarkValidate() {
+ public void benchmarkTextValidation() {
String [] strings = new String[100];
for (int i=0; i < strings.length; i++) {
strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString();
}
- long sum = validate(strings, 1000000);
+ long sum = benchmarkValidate(strings, 1000000);
System.out.println("Warmup num validate = " + sum);
- sum = isValid(strings, 1000000);
+ sum = benchmarkIsValid(strings, 1000000);
System.out.println("Warmup num isValid = " + sum);
long start = System.nanoTime();
- sum = validate(strings, 100000000);
+ sum = benchmarkValidate(strings, 100000000);
long diff = System.nanoTime() - start;
System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns");
start = System.nanoTime();
- sum = isValid(strings, 100000000);
+ sum = benchmarkIsValid(strings, 100000000);
diff = System.nanoTime() - start;
System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns");