aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java13
-rw-r--r--document/src/main/java/com/yahoo/document/idstring/IdString.java7
-rw-r--r--vespajlib/abi-spec.json1
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java23
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/TextTestCase.java44
5 files changed, 75 insertions, 13 deletions
diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
index de96c548652..51b068b4712 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
@@ -17,7 +17,7 @@ import com.yahoo.vespa.objects.Ids;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
-import java.util.OptionalInt;
+import java.util.Objects;
/**
* A StringFieldValue is a wrapper class that holds a String in {@link com.yahoo.document.Document}s and
@@ -57,10 +57,9 @@ public class StringFieldValue extends FieldValue {
}
private static void validateTextString(String value) {
- OptionalInt illegalCodePoint = Text.validateTextString(value);
- if (illegalCodePoint.isPresent()) {
+ if ( ! Text.isValidTextString(value)) {
throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
- Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
+ Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase());
}
}
@@ -88,7 +87,7 @@ public class StringFieldValue extends FieldValue {
public StringFieldValue clone() {
StringFieldValue strfval = (StringFieldValue) super.clone();
if (spanTrees != null) {
- strfval.spanTrees = new HashMap<String, SpanTree>(spanTrees.size());
+ strfval.spanTrees = new HashMap<>(spanTrees.size());
for (Map.Entry<String, SpanTree> entry : spanTrees.entrySet()) {
strfval.spanTrees.put(entry.getKey(), new SpanTree(entry.getValue()));
}
@@ -240,8 +239,8 @@ public class StringFieldValue extends FieldValue {
if (!(o instanceof StringFieldValue)) return false;
if (!super.equals(o)) return false;
StringFieldValue that = (StringFieldValue) o;
- if ((spanTrees != null) ? !spanTrees.equals(that.spanTrees) : that.spanTrees != null) return false;
- if ((value != null) ? !value.equals(that.value) : that.value != null) return false;
+ if (!Objects.equals(spanTrees, that.spanTrees)) return false;
+ if (!Objects.equals(value, that.value)) return false;
return true;
}
diff --git a/document/src/main/java/com/yahoo/document/idstring/IdString.java b/document/src/main/java/com/yahoo/document/idstring/IdString.java
index 55beff9eef9..40ac19dec6d 100644
--- a/document/src/main/java/com/yahoo/document/idstring/IdString.java
+++ b/document/src/main/java/com/yahoo/document/idstring/IdString.java
@@ -5,8 +5,6 @@ import com.yahoo.api.annotations.Beta;
import com.yahoo.text.Text;
import com.yahoo.text.Utf8String;
-import java.util.OptionalInt;
-
/**
* To be used with DocumentId constructor.
*
@@ -81,10 +79,9 @@ public abstract class IdString {
}
private static void validateTextString(String id) {
- OptionalInt illegalCodePoint = Text.validateTextString(id);
- if (illegalCodePoint.isPresent()) {
+ if ( ! Text.isValidTextString(id)) {
throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal code point 0x" +
- Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
+ Integer.toHexString(Text.validateTextString(id).getAsInt()).toUpperCase());
}
}
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json
index 20c7d435964..e69631e8375 100644
--- a/vespajlib/abi-spec.json
+++ b/vespajlib/abi-spec.json
@@ -3232,6 +3232,7 @@
"methods": [
"public static boolean isTextCharacter(int)",
"public static java.util.OptionalInt validateTextString(java.lang.String)",
+ "public static boolean isValidTextString(java.lang.String)",
"public static boolean isDisplayable(int)",
"public static java.lang.String stripInvalidCharacters(java.lang.String)",
"public static java.lang.String truncate(java.lang.String, int)",
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 662100aa8ea..30eba3ebd65 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -48,7 +48,11 @@ public final class Text {
// The link above notes that 0x7F-0x84 and 0x86-0x9F are discouraged, but they are still allowed -
// see http://www.w3.org/International/questions/qa-controls
- if (codepoint < 0x80) return allowedAsciiChars[codepoint];
+ return (codepoint < 0x80)
+ ? allowedAsciiChars[codepoint]
+ : isTextCharAboveUsAscii(codepoint);
+ }
+ private static boolean isTextCharAboveUsAscii(int codepoint) {
if (codepoint < 0xFDD0) return true;
if (codepoint <= 0xFDDF) return false;
if (codepoint < 0x1FFFE) return true;
@@ -110,6 +114,23 @@ public final class Text {
return OptionalInt.empty();
}
+ /**
+ * Validates that the given string value only contains text characters.
+ */
+ public static boolean isValidTextString(String string) {
+ for (int i = 0; i < string.length(); ) {
+ int codePoint = string.codePointAt(i);
+ if ( ! Text.isTextCharacter(codePoint)) return false;
+
+ int charCount = Character.charCount(codePoint);
+ if (Character.isHighSurrogate(string.charAt(i))) {
+ if ( (charCount == 1) || !Character.isLowSurrogate(string.charAt(i+1))) return false;
+ }
+ i += charCount;
+ }
+ return true;
+ }
+
/** Returns whether the given code point is displayable. */
public static boolean isDisplayable(int codePoint) {
switch (Character.getType(codePoint)) {
diff --git a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
index a2cb2158278..33274380aad 100644
--- a/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/text/TextTestCase.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.text;
+import org.junit.Ignore;
import org.junit.Test;
import java.util.OptionalInt;
@@ -76,4 +77,47 @@ public class TextTestCase {
public void testFormat() {
assertEquals("foo 3.14", Text.format("%s %.2f", "foo", 3.1415926536));
}
+
+ private static long isValid(String [] strings, int num) {
+ long sum = 0;
+ for (int i=0; i < num; i++) {
+ if (Text.isValidTextString(strings[i%strings.length])) {
+ sum++;
+ }
+ }
+ return sum;
+ }
+ private static long validate(String [] strings, int num) {
+ long sum = 0;
+ for (int i=0; i < num; i++) {
+ if (Text.validateTextString(strings[i%strings.length]).isEmpty()) {
+ sum++;
+ }
+ }
+ return sum;
+ }
+
+ @Ignore
+ @Test
+ public void benchmarkValidate() {
+ String [] strings = new String[100];
+ for (int i=0; i < strings.length; i++) {
+ strings[i] = new StringBuilder("some text ").append(i).append("of mine.").appendCodePoint(0xDFFFC).append("foo").toString();
+ }
+ long sum = validate(strings, 1000000);
+ System.out.println("Warmup num validate = " + sum);
+ sum = isValid(strings, 1000000);
+ System.out.println("Warmup num isValid = " + sum);
+
+ long start = System.nanoTime();
+ sum = validate(strings, 100000000);
+ long diff = System.nanoTime() - start;
+ System.out.println("Validation num validate = " + sum + ". Took " + diff + "ns");
+
+ start = System.nanoTime();
+ sum = isValid(strings, 100000000);
+ diff = System.nanoTime() - start;
+ System.out.println("Validation num isValid = " + sum + ". Took " + diff + "ns");
+
+ }
}