summaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2017-10-24 13:52:49 +0200
committerGitHub <noreply@github.com>2017-10-24 13:52:49 +0200
commit95151839fd9f90f88023f1ae5f0302915d2aeeb8 (patch)
tree09862116c7d937f24d5af49e45a3b464817344dc /document
parent6037e9d694386b5a7b104eea61259d8671ba13db (diff)
parentdc9ff821c637f0a06c422c2f4eb00773ffd61ff6 (diff)
Merge pull request #3834 from vespa-engine/geirst/validate-document-ids-only-contain-text-characters
Validate that document ids only contain text characters.
Diffstat (limited to 'document')
-rw-r--r--document/src/main/java/com/yahoo/document/DocumentId.java18
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java22
-rw-r--r--document/src/main/java/com/yahoo/document/idstring/IdString.java38
-rw-r--r--document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java2
-rw-r--r--document/src/test/java/com/yahoo/document/DocumentIdTestCase.java47
-rw-r--r--document/src/test/java/com/yahoo/document/DocumentTestCase.java31
6 files changed, 140 insertions, 18 deletions
diff --git a/document/src/main/java/com/yahoo/document/DocumentId.java b/document/src/main/java/com/yahoo/document/DocumentId.java
index 3ba977fec9e..e7c904cc420 100644
--- a/document/src/main/java/com/yahoo/document/DocumentId.java
+++ b/document/src/main/java/com/yahoo/document/DocumentId.java
@@ -25,10 +25,9 @@ public class DocumentId extends Identifiable implements Serializable {
}
/**
- * Constructor. This constructor is used if the DocumentId is used outside of a Document object, but we have the
- * URI.
+ * Creates a document id based on the given document id URI string.
*
- * @param id Associate with this URI, storage address etc. is not applicable.
+ * The document id string can only contain text characters.
*/
public DocumentId(String id) {
if (id == null) {
@@ -43,6 +42,17 @@ public class DocumentId extends Identifiable implements Serializable {
globalId = null;
}
+ /**
+ * Creates a document id based on a serialized document id URI string.
+ *
+ * The document id string is not allowed to contain 0x0 byte characters.
+ * Otherwise all characters are allowed to ensure that document ids
+ * already stored can be de-serialized.
+ */
+ public static DocumentId createFromSerialized(String id) {
+ return new DocumentId(IdString.createFromSerialized(id));
+ }
+
@Override
public DocumentId clone() {
DocumentId docId = (DocumentId)super.clone();
@@ -95,7 +105,7 @@ public class DocumentId extends Identifiable implements Serializable {
if (data instanceof DocumentReader) {
id = ((DocumentReader)data).readDocumentId().getScheme();
} else {
- id = IdString.createIdString(data.getString(null));
+ id = IdString.createFromSerialized(data.getString(null));
}
}
diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
index 38a643992f1..afb7efd788f 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
@@ -17,6 +17,7 @@ import com.yahoo.vespa.objects.Ids;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
+import java.util.OptionalInt;
/**
* A StringFieldValue is a wrapper class that holds a String in {@link com.yahoo.document.Document}s and
@@ -55,19 +56,16 @@ public class StringFieldValue extends FieldValue {
setValue(value);
}
- private void setValue(String value) {
- for (int i = 0; i < value.length(); i++) {
- char theChar = value.charAt(i);
- int codePoint = value.codePointAt(i);
- if (Character.isHighSurrogate(theChar)) {
- //skip one char ahead, since codePointAt() consumes one more char in this case
- ++i;
- }
-
- if ( ! Text.isTextCharacter(codePoint))
- throw new IllegalArgumentException("A string field value cannot contain code point 0x" +
- Integer.toHexString(codePoint).toUpperCase());
+ private static void validateTextString(String value) {
+ OptionalInt illegalCodePoint = Text.validateTextString(value);
+ if (illegalCodePoint.isPresent()) {
+ throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
+ Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
}
+ }
+
+ private void setValue(String value) {
+ validateTextString(value);
this.value = value;
}
diff --git a/document/src/main/java/com/yahoo/document/idstring/IdString.java b/document/src/main/java/com/yahoo/document/idstring/IdString.java
index 468dc9a38b1..c58f2070e31 100644
--- a/document/src/main/java/com/yahoo/document/idstring/IdString.java
+++ b/document/src/main/java/com/yahoo/document/idstring/IdString.java
@@ -1,11 +1,13 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.document.idstring;
+import com.yahoo.text.Text;
import com.yahoo.text.Utf8String;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.OptionalInt;
/**
* To be used with DocumentId constructor.
@@ -75,7 +77,43 @@ public abstract class IdString {
}
}
+ /**
+ * Creates a IdString based on the given document id string.
+ *
+ * The document id string can only contain text characters.
+ */
public static IdString createIdString(String id) {
+ validateTextString(id);
+ return parseAndCreate(id);
+ }
+
+ /**
+ * Creates a IdString based on the given serialized document id string.
+ *
+ * The document id string can not contain 0x0 byte characters.
+ */
+ public static IdString createFromSerialized(String id) {
+ validateNoZeroBytes(id);
+ return parseAndCreate(id);
+ }
+
+ private static void validateTextString(String id) {
+ OptionalInt illegalCodePoint = Text.validateTextString(id);
+ if (illegalCodePoint.isPresent()) {
+ throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal code point 0x" +
+ Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase());
+ }
+ }
+
+ private static void validateNoZeroBytes(String id) {
+ for (int i = 0; i < id.length(); i++) {
+ if (id.codePointAt(i) == 0) {
+ throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal zero byte code point");
+ }
+ }
+ }
+
+ private static IdString parseAndCreate(String id) {
String namespace;
long userId;
String group;
diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java
index ff0e11bcdeb..a048ea349eb 100644
--- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java
+++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java
@@ -600,7 +600,7 @@ public class VespaDocumentDeserializer42 extends VespaDocumentSerializer42 imple
public DocumentId readDocumentId() {
Utf8String uri = new Utf8String(parseNullTerminatedString(getBuf().getByteBuffer()));
- return new DocumentId(uri.toString());
+ return DocumentId.createFromSerialized(uri.toString());
}
public DocumentType readDocumentType() {
diff --git a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
index 75dfd612ec4..79a10bc72e4 100644
--- a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
+++ b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java
@@ -3,6 +3,7 @@ package com.yahoo.document;
import com.yahoo.document.*;
import com.yahoo.document.idstring.*;
+import com.yahoo.vespa.objects.BufferSerializer;
import java.math.BigInteger;
@@ -10,6 +11,9 @@ import java.io.*;
import java.util.regex.Pattern;
import java.util.Arrays;
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertThat;
+
public class DocumentIdTestCase extends junit.framework.TestCase {
DocumentTypeManager manager = new DocumentTypeManager();
@@ -291,4 +295,47 @@ public class DocumentIdTestCase extends junit.framework.TestCase {
assertEquals(Arrays.hashCode(docId0Gid), Arrays.hashCode(docId0CopyGid));
}
+ public void testDocumentIdCanOnlyContainTextCharacters() throws UnsupportedEncodingException {
+ assertExceptionWhenConstructing(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}, // "id:a:b::0x0c"
+ "illegal code point 0x0");
+ assertExceptionWhenConstructing(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}, // "id:a:b::0x7c"
+ "illegal code point 0x7");
+ }
+
+ private void assertExceptionWhenConstructing(byte[] rawId,
+ String exceptionMsg) throws UnsupportedEncodingException {
+ String strId = new String(rawId, "UTF-8");
+ try {
+ new DocumentId(strId);
+ fail("Expected an IllegalArgumentException to be thrown");
+ } catch (IllegalArgumentException ex) {
+ assertThat(ex.getMessage(), containsString(exceptionMsg));
+ }
+ }
+
+ public void testSerializedDocumentIdCanContainNonTextCharacter() throws UnsupportedEncodingException {
+ String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}); // "id:a:b::0x7c"
+ DocumentId docId = DocumentId.createFromSerialized(strId);
+ {
+ assertEquals(strId, docId.toString());
+ }
+ {
+ BufferSerializer buf = new BufferSerializer();
+ docId.serialize(buf);
+ buf.flip();
+ DocumentId deserializedId = new DocumentId(buf);
+ assertEquals(strId, deserializedId.toString());
+ }
+ }
+
+ public void testSerializedDocumentIdCannotContainZeroByte() throws UnsupportedEncodingException {
+ String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}); // "id:a:b::0x0c"
+ try {
+ DocumentId.createFromSerialized(strId);
+ fail("Expected an IllegalArgumentException to be thrown");
+ } catch (IllegalArgumentException ex) {
+ assertThat(ex.getMessage(), containsString("illegal zero byte code point"));
+ }
+ }
+
}
diff --git a/document/src/test/java/com/yahoo/document/DocumentTestCase.java b/document/src/test/java/com/yahoo/document/DocumentTestCase.java
index 47a4c377da0..44c82042d5c 100644
--- a/document/src/test/java/com/yahoo/document/DocumentTestCase.java
+++ b/document/src/test/java/com/yahoo/document/DocumentTestCase.java
@@ -23,6 +23,7 @@ import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.Map;
@@ -30,7 +31,6 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -1404,4 +1404,33 @@ public class DocumentTestCase extends DocumentTestCaseBase {
assertEquals(doc2Before, doc2After);
}
+ private static class DocumentIdFixture {
+ private final DocumentTypeManager docMan = new DocumentTypeManager();
+ private final DocumentType docType = new DocumentType("b");
+ private final GrowableByteBuffer buffer = new GrowableByteBuffer();
+ public DocumentIdFixture() {
+ docMan.register(docType);
+ }
+ public void serialize(String docId) {
+ new Document(docType, DocumentId.createFromSerialized(docId))
+ .serialize(DocumentSerializerFactory.createHead(buffer));
+ buffer.flip();
+ }
+ public Document deserialize() {
+ return new Document(DocumentDeserializerFactory.createHead(docMan, buffer));
+ }
+ }
+
+ @Test
+ public void testDocumentIdWithNonTextCharacterCanBeDeserialized() throws UnsupportedEncodingException {
+ DocumentIdFixture f = new DocumentIdFixture();
+
+ // Document id = "id:a:b::0x7c"
+ String docId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}, "UTF-8");
+ f.serialize(docId);
+
+ Document result = f.deserialize();
+ assertEquals(docId, result.getId().toString());
+ }
+
}