summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2022-04-06 19:37:30 +0200
committerGitHub <noreply@github.com>2022-04-06 19:37:30 +0200
commit346e8e66e9c8d7f06c336d6c27919551604f18c0 (patch)
tree3453769ddca6a16fed065fb5552ab6cd73cb0145 /vespajlib
parent039589faf5f989d80b9fec2b28ed955ac6fd86f6 (diff)
parentcf3f7bbb9e99f5b3e6cf3ac3f93e813042e4a12c (diff)
Merge branch 'master' into jonmv/more-http-url
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java28
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java40
2 files changed, 36 insertions, 32 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 501ca980187..d1712a20626 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -50,10 +50,10 @@ public final class Text {
return (codepoint < 0x80)
? allowedAsciiChars[codepoint]
- : isTextCharAboveUsAscii(codepoint);
+ : (codepoint < Character.MIN_SURROGATE) || isTextCharAboveMinSurrogate(codepoint);
}
- private static boolean isTextCharAboveUsAscii(int codepoint) {
- // TODO jonmv: compute modulo?
+ private static boolean isTextCharAboveMinSurrogate(int codepoint) {
+ if (codepoint <= Character.MAX_HIGH_SURROGATE) return false;
if (codepoint < 0xFDD0) return true;
if (codepoint <= 0xFDDF) return false;
if (codepoint < 0x1FFFE) return true;
@@ -87,9 +87,7 @@ public final class Text {
if (codepoint < 0xFFFFE) return true;
if (codepoint <= 0xFFFFF) return false;
if (codepoint < 0x10FFFE) return true;
- if (codepoint <= 0x10FFFF) return false;
-
- return true;
+ return false;
}
/**
@@ -119,19 +117,23 @@ public final class Text {
* Validates that the given string value only contains text characters.
*/
public static boolean isValidTextString(String string) {
- for (int i = 0; i < string.length(); ) {
+ int length = string.length();
+ for (int i = 0; i < length; ) {
int codePoint = string.codePointAt(i);
- if ( ! Text.isTextCharacter(codePoint)) return false;
-
- int charCount = Character.charCount(codePoint);
- if (Character.isHighSurrogate(string.charAt(i))) {
- if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false;
+ if (codePoint < 0x80) {
+ if ( ! allowedAsciiChars[codePoint]) return false;
+ } else if (codePoint >= Character.MIN_SURROGATE) {
+ if ( ! isTextCharAboveMinSurrogate(codePoint)) return false;
+ if ( ! Character.isBmpCodePoint(codePoint)) {
+ i++;
+ }
}
- i += charCount;
+ i++;
}
return true;
}
+
/** Returns whether the given code point is displayable. */
public static boolean isDisplayable(int codePoint) {
switch (Character.getType(codePoint)) {
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index 33274380aad..033918f0bad 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -12,18 +12,18 @@ import static org.junit.Assert.assertTrue;
public class TextTestCase {
+ private static void validateText(OptionalInt expect, String text) {
+ assertEquals(expect, Text.validateTextString(text));
+ assertEquals(expect.isEmpty(), Text.isValidTextString(text));
+ }
@Test
public void testValidateTextString() {
- assertFalse(Text.validateTextString("valid").isPresent());
- assertEquals(OptionalInt.of(1), Text.validateTextString("text\u0001text\u0003"));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString()));
- assertEquals(OptionalInt.of(0xDFFFF),
- Text.validateTextString(new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString()));
+ validateText(OptionalInt.empty(), "valid");
+ validateText(OptionalInt.of(1), "text\u0001text\u0003");
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder().appendCodePoint(0xDFFFF).append("foo").toString());
+ validateText(OptionalInt.of(0xDFFFF), new StringBuilder("foo").appendCodePoint(0xDFFFF).append("foo").toString());
}
@Test
@@ -45,8 +45,9 @@ public class TextTestCase {
@Test
public void testThatHighSurrogateRequireLowSurrogate() {
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).toString()));
- assertEquals(OptionalInt.of(0xD800), Text.validateTextString(new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString()));
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).toString());
+ validateText(OptionalInt.of(0xD800), new StringBuilder().appendCodePoint(0xD800).append(0x0000).toString());
+ validateText(OptionalInt.empty(), new StringBuilder().appendCodePoint(0xD800).appendCodePoint(0xDC00).toString());
}
@Test
@@ -78,7 +79,7 @@ public class TextTestCase {
assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536));
}
- private static long isValid(String [] strings, int num) {
+ private static long benchmarkIsValid(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.isValidTextString(strings[i%strings.length])) {
@@ -87,7 +88,8 @@ public class TextTestCase {
}
return sum;
}
- private static long validate(String [] strings, int num) {
+
+ private static long benchmarkValidate(String [] strings, int num) {
long sum = 0;
for (int i=0; i < num; i++) {
if (Text.validateTextString(strings[i%strings.length]).isEmpty()) {
@@ -99,23 +101,23 @@ public class TextTestCase {
@Ignore
@Test
- public void benchmarkValidate() {
+ public void benchmarkTextValidation() {
String [] strings = new String[100];
for (int i=0; i < strings.length; i++) {
strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString();
}
- long sum = validate(strings, 1000000);
+ long sum = benchmarkValidate(strings, 1000000);
System.out.println("Warmup num validate = " + sum);
- sum = isValid(strings, 1000000);
+ sum = benchmarkIsValid(strings, 1000000);
System.out.println("Warmup num isValid = " + sum);
long start = System.nanoTime();
- sum = validate(strings, 100000000);
+ sum = benchmarkValidate(strings, 100000000);
long diff = System.nanoTime() - start;
System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns");
start = System.nanoTime();
- sum = isValid(strings, 100000000);
+ sum = benchmarkIsValid(strings, 100000000);
diff = System.nanoTime() - start;
System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns");