aboutsummaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2024-03-08 12:09:45 +0100
committerjonmv <venstad@gmail.com>2024-03-08 12:09:45 +0100
commit94d4104d44f22d5d31b97913d9cc28749085b4f7 (patch)
treeb7599c4da2d8043254973f9a66545407d54bccc7 /document
parent3e9e5c081a97d715f63e1a5ca089e8223807596b (diff)
Strip invalid unicode based on system property
Diffstat (limited to 'document')
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java14
1 files changed, 9 insertions, 5 deletions
diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
index 8b4b94f6bbf..d09967f973f 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
@@ -29,6 +29,9 @@ import java.util.Objects;
*/
public class StringFieldValue extends FieldValue {
+ // TODO: remove this, it's a temporary workaround for invalid data stored before unicode validation was fixed
+ private static final boolean replaceInvalidUnicode = System.getProperty("vespa.replace_invalid_unicode", "false").equals("true");
+
private static class Factory extends PrimitiveDataType.Factory {
@Override public FieldValue create() { return new StringFieldValue(); }
@Override public FieldValue create(String value) { return new StringFieldValue(value); }
@@ -56,16 +59,17 @@ public class StringFieldValue extends FieldValue {
setValue(value);
}
- private static void validateTextString(String value) {
+ private static String validateTextString(String value) {
if ( ! Text.isValidTextString(value)) {
- throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
- Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase());
+ if (replaceInvalidUnicode) return Text.stripInvalidCharacters(value);
+ else throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
+ Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase());
}
+ return value;
}
private void setValue(String value) {
- validateTextString(value);
- this.value = value;
+ this.value = validateTextString(value);
}
/**