summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@oath.com>2017-10-20 13:42:12 +0200
committerGeir Storli <geirst@oath.com>2017-10-20 13:43:42 +0200
commit5bd2f9bc49ed4bd9b033005ee7f9ecd624c6b9c9 (patch)
treecf81e3d65747caec2818dd49e0d89f1541144a9d
parentd82d7c04128c23bc6c3ec5b8e7e822315055ca29 (diff)
Validate that document ids only contain text characters.
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java22
-rw-r--r--document/src/main/java/com/yahoo/document/idstring/IdString.java11
-rw-r--r--document/src/test/java/com/yahoo/document/DocumentIdTestCase.java14
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Text.java25
4 files changed, 58 insertions, 14 deletions
diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
index 38a643992f1..afb7efd788f 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
@@ -17,6 +17,7 @@ import com.yahoo.vespa.objects.Ids;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
+import java.util.OptionalInt;
/**
* A StringFieldValue is a wrapper class that holds a String in {@link com.yahoo.document.Document}s and
@@ -55,19 +56,16 @@ public class StringFieldValue extends FieldValue {
setValue(value);
}
- private void setValue(String value) {
- for (int i = 0; i < value.length(); i++) {
- char theChar = value.charAt(i);
- int codePoint = value.codePointAt(i);
- if (Character.isHighSurrogate(theChar)) {
- //skip one char ahead, since codePointAt() consumes one more char in this case
- ++i;
- }
-
- if ( ! Text.isTextCharacter(codePoint))
- throw new IllegalArgumentException("A string field value cannot contain code point 0x" +
- Integer.toHexString(codePoint).toUpperCase());
+ private static void validateTextString(String value) {
+ OptionalInt illegalCodePoint = Text.validateTextString(value);
+ if (illegalCodePoint.isPresent()) {
+ throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
+ Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
}
+ }
+
+ private void setValue(String value) {
+ validateTextString(value);
this.value = value;
}
diff --git a/document/src/main/java/com/yahoo/document/idstring/IdString.java b/document/src/main/java/com/yahoo/document/idstring/IdString.java
index 468dc9a38b1..27cdf8d603e 100644
--- a/document/src/main/java/com/yahoo/document/idstring/IdString.java
+++ b/document/src/main/java/com/yahoo/document/idstring/IdString.java
@@ -1,11 +1,13 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.document.idstring;
+import com.yahoo.text.Text;
import com.yahoo.text.Utf8String;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.OptionalInt;
/**
* To be used with DocumentId constructor.
@@ -75,7 +77,16 @@ public abstract class IdString {
}
}
+ private static void validateTextString(String id) {
+ OptionalInt illegalCodePoint = Text.validateTextString(id);
+ if (illegalCodePoint.isPresent()) {
+ throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal code point 0x" +
+ Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
+ }
+ }
+
public static IdString createIdString(String id) {
+ validateTextString(id);
String namespace;
long userId;
String group;
diff --git a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
index 75dfd612ec4..da7d40752fd 100644
--- a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
+++ b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
@@ -10,6 +10,9 @@ import java.io.*;
import java.util.regex.Pattern;
import java.util.Arrays;
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertThat;
+
public class DocumentIdTestCase extends junit.framework.TestCase {
DocumentTypeManager manager = new DocumentTypeManager();
@@ -291,4 +294,15 @@ public class DocumentIdTestCase extends junit.framework.TestCase {
assertEquals(Arrays.hashCode(docId0Gid), Arrays.hashCode(docId0CopyGid));
}
+ public void testDocumentIdCanOnlyContainTextCharacters() throws UnsupportedEncodingException {
+ byte[] rawId = {105, 100, 58, 97, 58, 98, 58, 58, 0, 99}; // "id:a:b::0x0c"
+ String strId = new String(rawId, "UTF-8");
+ try {
+ new DocumentId(strId);
+ fail("Expected an IllegalArgumentException to be thrown");
+ } catch (IllegalArgumentException ex) {
+ assertThat(ex.getMessage(), containsString("illegal code point 0x0"));
+ }
+ }
+
}
diff --git a/vespajlib/src/main/java/com/yahoo/text/Text.java b/vespajlib/src/main/java/com/yahoo/text/Text.java
index 2b670e5d727..684cdbbcb3e 100644
--- a/vespajlib/src/main/java/com/yahoo/text/Text.java
+++ b/vespajlib/src/main/java/com/yahoo/text/Text.java
@@ -1,5 +1,7 @@
package com.yahoo.text;
+import java.util.OptionalInt;
+
/**
* Text utility functions.
*
@@ -81,6 +83,25 @@ public final class Text {
if (codepoint <= 0x10FFFF) return false;
return true;
- }
-
+ }
+
+ /**
+ * Validates that the given string value only contains text characters and
+ * returns the first illegal code point if one is found.
+ */
+ public static OptionalInt validateTextString(String value) {
+ for (int i = 0; i < value.length(); i++) {
+ char theChar = value.charAt(i);
+ int codePoint = value.codePointAt(i);
+ if (Character.isHighSurrogate(theChar)) {
+ // Skip one char ahead, since codePointAt() consumes one more char in this case
+ ++i;
+ }
+ if (!Text.isTextCharacter(codePoint)) {
+ return OptionalInt.of(codePoint);
+ }
+ }
+ return OptionalInt.empty();
+ }
+
}