summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2024-03-08 12:35:32 +0100
committerGitHub <noreply@github.com>2024-03-08 12:35:32 +0100
commitda6420f61782ef1b0edb358fc12153ed36f34c59 (patch)
tree6061391d78c96ec57328d29a8091e3d05143374b
parent43e992f496450073ede028d66f849aaab6606ce5 (diff)
parent94d4104d44f22d5d31b97913d9cc28749085b4f7 (diff)
Merge pull request #30519 from vespa-engine/jonmv/handle-stored-invalid-unicode
Print doc id when failing to create concrete documents for processing
-rw-r--r--docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/MbusRequestContext.java1
-rw-r--r--docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/ProcessingFactory.java7
-rw-r--r--document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java14
3 files changed, 16 insertions, 6 deletions
diff --git a/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/MbusRequestContext.java b/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/MbusRequestContext.java
index 180173789ae..1836f25f8d2 100644
--- a/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/MbusRequestContext.java
+++ b/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/MbusRequestContext.java
@@ -40,6 +40,7 @@ public class MbusRequestContext implements RequestContext, ResponseHandler {
private final static Logger log = Logger.getLogger(MbusRequestContext.class.getName());
private final static CopyOnWriteHashMap<String, URI> uriCache = new CopyOnWriteHashMap<>();
+
private final AtomicBoolean deserialized = new AtomicBoolean(false);
private final AtomicBoolean responded = new AtomicBoolean(false);
private final ProcessingFactory processingFactory;
diff --git a/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/ProcessingFactory.java b/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/ProcessingFactory.java
index 030f95e380b..41d7f7920ca 100644
--- a/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/ProcessingFactory.java
+++ b/docproc/src/main/java/com/yahoo/docproc/jdisc/messagebus/ProcessingFactory.java
@@ -81,7 +81,12 @@ class ProcessingFactory {
log.fine(() -> "Unable to get document factory component '" + componentId + "' from document factory registry.");
return document;
}
- return cdf.getDocumentCopy(document.getDataType().getName(), document, document.getId());
+ try {
+ return cdf.getDocumentCopy(document.getDataType().getName(), document, document.getId());
+ }
+ catch (RuntimeException e) {
+ throw new IllegalArgumentException("error in document with id '" + document.getId() + "'", e);
+ }
}
private ContainerDocumentConfig.Doctype getDocumentConfig(String name) {
diff --git a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
index 8b4b94f6bbf..d09967f973f 100644
--- a/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
+++ b/document/src/main/java/com/yahoo/document/datatypes/StringFieldValue.java
@@ -29,6 +29,9 @@ import java.util.Objects;
*/
public class StringFieldValue extends FieldValue {
+ // TODO: remove this, it's a temporary workaround for invalid data stored before unicode validation was fixed
+ private static final boolean replaceInvalidUnicode = System.getProperty("vespa.replace_invalid_unicode", "false").equals("true");
+
private static class Factory extends PrimitiveDataType.Factory {
@Override public FieldValue create() { return new StringFieldValue(); }
@Override public FieldValue create(String value) { return new StringFieldValue(value); }
@@ -56,16 +59,17 @@ public class StringFieldValue extends FieldValue {
setValue(value);
}
- private static void validateTextString(String value) {
+ private static String validateTextString(String value) {
if ( ! Text.isValidTextString(value)) {
- throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
- Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase());
+ if (replaceInvalidUnicode) return Text.stripInvalidCharacters(value);
+ else throw new IllegalArgumentException("The string field value contains illegal code point 0x" +
+ Integer.toHexString(Text.validateTextString(value).getAsInt()).toUpperCase());
}
+ return value;
}
private void setValue(String value) {
- validateTextString(value);
- this.value = value;
+ this.value = validateTextString(value);
}
/**