if the Document to be processed is the wrong type, convert it

* this happens when you have a concrete document * the indexing processor works better with the "normal" format, especially if there are any complex structure in the schema such as array<string> that needs processing * convert to normal format by serializing and deserializing
author: Arne Juul <arnej@yahooinc.com> 2023-01-04 22:01:52 +0000
committer: Arne Juul <arnej@yahooinc.com> 2023-01-04 22:01:52 +0000
commit: a3d5ae43f29432e5733d8b1493b5b55c8396b728 (patch)
tree: e2d08e17e920a549f9c933d922f8f03cb13b0024 /docprocs
parent: 98933f7cd7ee770d36533cd13f60ed35dbbbb9dc (diff)
1 files changed, 15 insertions, 2 deletions
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
index 7fc2ed022dd..2561fdc7dc5 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
@@ -17,6 +17,9 @@ import com.yahoo.document.DocumentRemove;
 import com.yahoo.document.DocumentType;
 import com.yahoo.document.DocumentTypeManager;
 import com.yahoo.document.DocumentUpdate;
+import com.yahoo.document.serialization.DocumentSerializer;
+import com.yahoo.document.serialization.DocumentSerializerFactory;
+import com.yahoo.io.GrowableByteBuffer;
 import com.yahoo.language.Linguistics;
 import com.yahoo.language.process.Embedder;
 import com.yahoo.language.provider.DefaultEmbedderProvider;
@@ -93,14 +96,24 @@ public class IndexingProcessor extends DocumentProcessor {
     }
 
     private void processDocument(DocumentPut prev, List<DocumentOperation> out) {
-        DocumentScript script = scriptMgr.getScript(prev.getDocument().getDataType());
+        DocumentType hadType = prev.getDocument().getDataType();
+        DocumentScript script = scriptMgr.getScript(hadType);
         if (script == null) {
             log.log(Level.FINE, "No indexing script for document '%s'.", prev.getId());
             out.add(prev);
             return;
         }
         log.log(Level.FINE, "Processing document '%s'.", prev.getId());
-        Document next = script.execute(adapterFactory, prev.getDocument());
+        DocumentType wantType = docTypeMgr.getDocumentType(hadType.getName());
+        Document prevDoc = prev.getDocument();
+        if (hadType != wantType) {
+            GrowableByteBuffer buffer = new GrowableByteBuffer(64 * 1024, 2.0f);
+            DocumentSerializer serializer = DocumentSerializerFactory.createHead(buffer);
+            serializer.write(prevDoc);
+            buffer.flip();
+            prevDoc = docTypeMgr.createDocument(buffer);
+        }
+        Document next = script.execute(adapterFactory, prevDoc);
         if (next == null) {
             log.log(Level.FINE, "Document '%s' produced no output.", prev.getId());
             return;
author	Arne Juul <arnej@yahooinc.com>	2023-01-04 22:01:52 +0000
committer	Arne Juul <arnej@yahooinc.com>	2023-01-04 22:01:52 +0000
commit	a3d5ae43f29432e5733d8b1493b5b55c8396b728 (patch)
tree	e2d08e17e920a549f9c933d922f8f03cb13b0024 /docprocs
parent	98933f7cd7ee770d36533cd13f60ed35dbbbb9dc (diff)