summaryrefslogtreecommitdiffstats
path: root/docprocs
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-01-04 22:01:52 +0000
committerArne Juul <arnej@yahooinc.com>2023-01-04 22:01:52 +0000
commita3d5ae43f29432e5733d8b1493b5b55c8396b728 (patch)
treee2d08e17e920a549f9c933d922f8f03cb13b0024 /docprocs
parent98933f7cd7ee770d36533cd13f60ed35dbbbb9dc (diff)
if the Document to be processed is the wrong type, convert it
* this happens when you have a concrete document * the indexing processor works better with the "normal" format, especially if there are any complex structure in the schema such as array<string> that needs processing * convert to normal format by serializing and deserializing
Diffstat (limited to 'docprocs')
-rw-r--r--docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java17
1 files changed, 15 insertions, 2 deletions
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
index 7fc2ed022dd..2561fdc7dc5 100644
--- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
+++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
@@ -17,6 +17,9 @@ import com.yahoo.document.DocumentRemove;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.DocumentUpdate;
+import com.yahoo.document.serialization.DocumentSerializer;
+import com.yahoo.document.serialization.DocumentSerializerFactory;
+import com.yahoo.io.GrowableByteBuffer;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.provider.DefaultEmbedderProvider;
@@ -93,14 +96,24 @@ public class IndexingProcessor extends DocumentProcessor {
}
private void processDocument(DocumentPut prev, List<DocumentOperation> out) {
- DocumentScript script = scriptMgr.getScript(prev.getDocument().getDataType());
+ DocumentType hadType = prev.getDocument().getDataType();
+ DocumentScript script = scriptMgr.getScript(hadType);
if (script == null) {
log.log(Level.FINE, "No indexing script for document '%s'.", prev.getId());
out.add(prev);
return;
}
log.log(Level.FINE, "Processing document '%s'.", prev.getId());
- Document next = script.execute(adapterFactory, prev.getDocument());
+ DocumentType wantType = docTypeMgr.getDocumentType(hadType.getName());
+ Document prevDoc = prev.getDocument();
+ if (hadType != wantType) {
+ GrowableByteBuffer buffer = new GrowableByteBuffer(64 * 1024, 2.0f);
+ DocumentSerializer serializer = DocumentSerializerFactory.createHead(buffer);
+ serializer.write(prevDoc);
+ buffer.flip();
+ prevDoc = docTypeMgr.createDocument(buffer);
+ }
+ Document next = script.execute(adapterFactory, prevDoc);
if (next == null) {
log.log(Level.FINE, "Document '%s' produced no output.", prev.getId());
return;