summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-04-06 13:06:39 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2022-04-06 13:06:39 +0200
commit858c67f8a3cdae524344e6cbb22f6581c4352966 (patch)
tree0baa27d834a0c36333f9e08ca30f3b02dad91565 /vespajlib
parent3c4cddd0b08666e497996e7f5b499e5a86fc68eb (diff)
Refactor to optimize for hot path.
No reason to count characters when you already now there can only be one. Nor is there any reason to check for surrogates when there can be none.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java26
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java40
2 files changed, 40 insertions, 26 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 30eba3ebd65..f897404ae28 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -118,19 +118,31 @@ public final class Text {
* Validates that the given string value only contains text characters.
*/
public static boolean isValidTextString(String string) {
- for (int i = 0; i < string.length(); ) {
+ int length = string.length();
+ for (int i = 0; i < length; ) {
int codePoint = string.codePointAt(i);
- if ( ! Text.isTextCharacter(codePoint)) return false;
-
- int charCount = Character.charCount(codePoint);
- if (Character.isHighSurrogate(string.charAt(i))) {
- if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false;
+ if (codePoint < 0x80) {
+ if ( ! allowedAsciiChars[codePoint]) return false;
+ i++;
+ } else if (codePoint < Character.MIN_SURROGATE) {
+ i++;
+ } else {
+ if ( ! isTextCharAboveUsAscii(codePoint)) return false;
+ if ( ! Character.isValidCodePoint(codePoint)) return false;
+ if ( ! Character.isSupplementaryCodePoint(codePoint)) {
+ if (Character.isHighSurrogate((char)codePoint)) return false;
+ i++;
+ } else {
+ if (Character.isHighSurrogate(Character.highSurrogate(codePoint))
+ && ! Character.isLowSurrogate(Character.lowSurrogate(codePoint))) return false;
+ i += 2;
+ }
}
- i += charCount;
}
return true;
}
+
/** Returns whether the given code point is displayable. */
public static boolean isDisplayable(int codePoint) {
switch (Character.getType(codePoint)) {
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index 33274380aad..033918f0bad 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -12,18 +12,18 @@ import static org.junit.Assert.assertTrue;
public class TextTestCase {
+ private static void validateText(OptionalInt expect, String text) {
+ assertEquals(expect, Text.validateTextString(text));
+ assertEquals(expect.isEmpty(), Text.isValidTextString(text));
+ }
@Test
public void testValidateTextString() {
- assertFalse(Text.validateTextString("valid").isPresent());
- assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003"));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
+ validateText(OptionalInt.empty(), "valid");
+ validateText(OptionalInt.of(1), "text\u0001text\u0003");
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString());
}
@Test
@@ -45,8 +45,9 @@ public class TextTestCase {
@Test
public void testThatHighSurrogateRequireLowSurrogate() {
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString()));
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()));
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).toString());
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString());
+ validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString());
}
@Test
@@ -78,7 +79,7 @@ public class TextTestCase {
assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536));
}
- private static long isValid(String [] strings, int num) {
+ private static long benchmarkIsValid(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.isValidTextString(strings[i%strings.length])) {
@@ -87,7 +88,8 @@ public class TextTestCase {
}
return sum;
}
- private static long validate(String [] strings, int num) {
+
+ private static long benchmarkValidate(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.validateTextString(strings[i%strings.length]).isEmpty()) {
@@ -99,23 +101,23 @@ public class TextTestCase {
@Ignore
@Test
- public void benchmarkValidate() {
+ public void benchmarkTextValidation() {
String [] strings = new String[100];
for (int i=0; i < strings.length; i++) {
strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString();
}
- long sum = validate(strings, 1000000);
+ long sum = benchmarkValidate(strings, 1000000);
System.out.println("Warmup num validate = " + sum);
- sum = isValid(strings, 1000000);
+ sum = benchmarkIsValid(strings, 1000000);
System.out.println("Warmup num isValid = " + sum);
long start = System.nanoTime();
- sum = validate(strings, 100000000);
+ sum = benchmarkValidate(strings, 100000000);
long diff = System.nanoTime() - start;
System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns");
start = System.nanoTime();
- sum = isValid(strings, 100000000);
+ sum = benchmarkIsValid(strings, 100000000);
diff = System.nanoTime() - start;
System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns");