From 94d4104d44f22d5d31b97913d9cc28749085b4f7 Mon Sep 17 00:00:00 2001 From: jonmv Date: Fri, 8 Mar 2024 12:09:45 +0100 Subject: Strip invalid unicode based on system property --- .../com/yahoo/document/datatypes/StringFieldValue.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'document') diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java index 8b4b94f6bbf..d09967f973f 100644 --- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java +++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java @@ -29,6 +29,9 @@ import java.util.Objects; */ public class StringFieldValue extends FieldValue { + // TODO: remove this, it's a temporary workaround for invalid data stored before unicode validation was fixed + private static final boolean replaceInvalidUnicode = System.getProperty("vespa.replace_invalid_unicode", "false").equals("true"); + private static class Factory extends PrimitiveDataType.Factory { @Override public FieldValue create() { return new StringFieldValue(); } @Override public FieldValue create(String value) { return new StringFieldValue(value); } @@ -56,16 +59,17 @@ public class StringFieldValue extends FieldValue { setValue(value); } - private static void validateTextString(String value) { + private static String validateTextString(String value) { if ( ! Text.isValidTextString(value)) { - throw new IllegalArgumentException("The string field value contains illegal code point 0x" + - Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase()); + if (replaceInvalidUnicode) return Text.stripInvalidCharacters(value); + else throw new IllegalArgumentException("The string field value contains illegal code point 0x" + + Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase()); } + return value; } private void setValue(String value) { - validateTextString(value); - this.value = value; + this.value = validateTextString(value); } /** -- cgit v1.2.3