diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2017-10-24 13:52:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-24 13:52:49 +0200 |
commit | 95151839fd9f90f88023f1ae5f0302915d2aeeb8 (patch) | |
tree | 09862116c7d937f24d5af49e45a3b464817344dc /document | |
parent | 6037e9d694386b5a7b104eea61259d8671ba13db (diff) | |
parent | dc9ff821c637f0a06c422c2f4eb00773ffd61ff6 (diff) |
Merge pull request #3834 from vespa-engine/geirst/validate-document-ids-only-contain-text-characters
Validate that document ids only contain text characters.
Diffstat (limited to 'document')
6 files changed, 140 insertions, 18 deletions
diff --git a/document/src/main/java/com/yahoo/document/DocumentId.java b/document/src/main/java/com/yahoo/document/DocumentId.java index 3ba977fec9e..e7c904cc420 100644 --- a/document/src/main/java/com/yahoo/document/DocumentId.java +++ b/document/src/main/java/com/yahoo/document/DocumentId.java @@ -25,10 +25,9 @@ public class DocumentId extends Identifiable implements Serializable { } /** - * Constructor. This constructor is used if the DocumentId is used outside of a Document object, but we have the - * URI. + * Creates a document id based on the given document id URI string. * - * @param id Associate with this URI, storage address etc. is not applicable. + * The document id string can only contain text characters. */ public DocumentId(String id) { if (id == null) { @@ -43,6 +42,17 @@ public class DocumentId extends Identifiable implements Serializable { globalId = null; } + /** + * Creates a document id based on a serialized document id URI string. + * + * The document id string is not allowed to contain 0x0 byte characters. + * Otherwise all characters are allowed to ensure that document ids + * already stored can be de-serialized. + */ + public static DocumentId createFromSerialized(String id) { + return new DocumentId(IdString.createFromSerialized(id)); + } + @Override public DocumentId clone() { DocumentId docId = (DocumentId)super.clone(); @@ -95,7 +105,7 @@ public class DocumentId extends Identifiable implements Serializable { if (data instanceof DocumentReader) { id = ((DocumentReader)data).readDocumentId().getScheme(); } else { - id = IdString.createIdString(data.getString(null)); + id = IdString.createFromSerialized(data.getString(null)); } } diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java index 38a643992f1..afb7efd788f 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java +++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java @@ -17,6 +17,7 @@ import com.yahoo.vespa.objects.Ids; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.OptionalInt; /** * A StringFieldValue is a wrapper class that holds a String in {@link com.yahoo.document.Document}s and @@ -55,19 +56,16 @@ public class StringFieldValue extends FieldValue { setValue(value); } - private void setValue(String value) { - for (int i = 0; i < value.length(); i++) { - char theChar = value.charAt(i); - int codePoint = value.codePointAt(i); - if (Character.isHighSurrogate(theChar)) { - //skip one char ahead, since codePointAt() consumes one more char in this case - ++i; - } - - if ( ! Text.isTextCharacter(codePoint)) - throw new IllegalArgumentException("A string field value cannot contain code point 0x" + - Integer.toHexString(codePoint).toUpperCase()); + private static void validateTextString(String value) { + OptionalInt illegalCodePoint = Text.validateTextString(value); + if (illegalCodePoint.isPresent()) { + throw new IllegalArgumentException("The string field value contains illegal code point 0x" + + Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase()); } + } + + private void setValue(String value) { + validateTextString(value); this.value = value; } diff --git a/document/src/main/java/com/yahoo/document/idstring/IdString.java b/document/src/main/java/com/yahoo/document/idstring/IdString.java index 468dc9a38b1..c58f2070e31 100644 --- a/document/src/main/java/com/yahoo/document/idstring/IdString.java +++ b/document/src/main/java/com/yahoo/document/idstring/IdString.java @@ -1,11 +1,13 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.document.idstring; +import com.yahoo.text.Text; import com.yahoo.text.Utf8String; import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.OptionalInt; /** * To be used with DocumentId constructor. @@ -75,7 +77,43 @@ public abstract class IdString { } } + /** + * Creates a IdString based on the given document id string. + * + * The document id string can only contain text characters. + */ public static IdString createIdString(String id) { + validateTextString(id); + return parseAndCreate(id); + } + + /** + * Creates a IdString based on the given serialized document id string. + * + * The document id string can not contain 0x0 byte characters. + */ + public static IdString createFromSerialized(String id) { + validateNoZeroBytes(id); + return parseAndCreate(id); + } + + private static void validateTextString(String id) { + OptionalInt illegalCodePoint = Text.validateTextString(id); + if (illegalCodePoint.isPresent()) { + throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal code point 0x" + + Integer.toHexString(illegalCodePoint.getAsInt()).toUpperCase()); + } + } + + private static void validateNoZeroBytes(String id) { + for (int i = 0; i < id.length(); i++) { + if (id.codePointAt(i) == 0) { + throw new IllegalArgumentException("Unparseable id '" + id + "': Contains illegal zero byte code point"); + } + } + } + + private static IdString parseAndCreate(String id) { String namespace; long userId; String group; diff --git a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java index ff0e11bcdeb..a048ea349eb 100644 --- a/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java +++ b/document/src/main/java/com/yahoo/document/serialization/VespaDocumentDeserializer42.java @@ -600,7 +600,7 @@ public class VespaDocumentDeserializer42 extends VespaDocumentSerializer42 imple public DocumentId readDocumentId() { Utf8String uri = new Utf8String(parseNullTerminatedString(getBuf().getByteBuffer())); - return new DocumentId(uri.toString()); + return DocumentId.createFromSerialized(uri.toString()); } public DocumentType readDocumentType() { diff --git a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java index 75dfd612ec4..79a10bc72e4 100644 --- a/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java +++ b/document/src/test/java/com/yahoo/document/DocumentIdTestCase.java @@ -3,6 +3,7 @@ package com.yahoo.document; import com.yahoo.document.*; import com.yahoo.document.idstring.*; +import com.yahoo.vespa.objects.BufferSerializer; import java.math.BigInteger; @@ -10,6 +11,9 @@ import java.io.*; import java.util.regex.Pattern; import java.util.Arrays; +import static org.hamcrest.Matchers.containsString; +import static org.junit.Assert.assertThat; + public class DocumentIdTestCase extends junit.framework.TestCase { DocumentTypeManager manager = new DocumentTypeManager(); @@ -291,4 +295,47 @@ public class DocumentIdTestCase extends junit.framework.TestCase { assertEquals(Arrays.hashCode(docId0Gid), Arrays.hashCode(docId0CopyGid)); } + public void testDocumentIdCanOnlyContainTextCharacters() throws UnsupportedEncodingException { + assertExceptionWhenConstructing(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}, // "id:a:b::0x0c" + "illegal code point 0x0"); + assertExceptionWhenConstructing(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}, // "id:a:b::0x7c" + "illegal code point 0x7"); + } + + private void assertExceptionWhenConstructing(byte[] rawId, + String exceptionMsg) throws UnsupportedEncodingException { + String strId = new String(rawId, "UTF-8"); + try { + new DocumentId(strId); + fail("Expected an IllegalArgumentException to be thrown"); + } catch (IllegalArgumentException ex) { + assertThat(ex.getMessage(), containsString(exceptionMsg)); + } + } + + public void testSerializedDocumentIdCanContainNonTextCharacter() throws UnsupportedEncodingException { + String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}); // "id:a:b::0x7c" + DocumentId docId = DocumentId.createFromSerialized(strId); + { + assertEquals(strId, docId.toString()); + } + { + BufferSerializer buf = new BufferSerializer(); + docId.serialize(buf); + buf.flip(); + DocumentId deserializedId = new DocumentId(buf); + assertEquals(strId, deserializedId.toString()); + } + } + + public void testSerializedDocumentIdCannotContainZeroByte() throws UnsupportedEncodingException { + String strId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 0, 99}); // "id:a:b::0x0c" + try { + DocumentId.createFromSerialized(strId); + fail("Expected an IllegalArgumentException to be thrown"); + } catch (IllegalArgumentException ex) { + assertThat(ex.getMessage(), containsString("illegal zero byte code point")); + } + } + } diff --git a/document/src/test/java/com/yahoo/document/DocumentTestCase.java b/document/src/test/java/com/yahoo/document/DocumentTestCase.java index 47a4c377da0..44c82042d5c 100644 --- a/document/src/test/java/com/yahoo/document/DocumentTestCase.java +++ b/document/src/test/java/com/yahoo/document/DocumentTestCase.java @@ -23,6 +23,7 @@ import org.junit.Test; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.Map; @@ -30,7 +31,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -1404,4 +1404,33 @@ public class DocumentTestCase extends DocumentTestCaseBase { assertEquals(doc2Before, doc2After); } + private static class DocumentIdFixture { + private final DocumentTypeManager docMan = new DocumentTypeManager(); + private final DocumentType docType = new DocumentType("b"); + private final GrowableByteBuffer buffer = new GrowableByteBuffer(); + public DocumentIdFixture() { + docMan.register(docType); + } + public void serialize(String docId) { + new Document(docType, DocumentId.createFromSerialized(docId)) + .serialize(DocumentSerializerFactory.createHead(buffer)); + buffer.flip(); + } + public Document deserialize() { + return new Document(DocumentDeserializerFactory.createHead(docMan, buffer)); + } + } + + @Test + public void testDocumentIdWithNonTextCharacterCanBeDeserialized() throws UnsupportedEncodingException { + DocumentIdFixture f = new DocumentIdFixture(); + + // Document id = "id:a:b::0x7c" + String docId = new String(new byte[]{105, 100, 58, 97, 58, 98, 58, 58, 7, 99}, "UTF-8"); + f.serialize(docId); + + Document result = f.deserialize(); + assertEquals(docId, result.getId().toString()); + } + } |