diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-01-04 22:01:52 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-01-04 22:01:52 +0000 |
commit | a3d5ae43f29432e5733d8b1493b5b55c8396b728 (patch) | |
tree | e2d08e17e920a549f9c933d922f8f03cb13b0024 /docprocs | |
parent | 98933f7cd7ee770d36533cd13f60ed35dbbbb9dc (diff) |
if the Document to be processed is the wrong type, convert it
* this happens when you have a concrete document
* the indexing processor works better with the "normal" format,
especially if there are any complex structure in the schema
such as array<string> that needs processing
* convert to normal format by serializing and deserializing
Diffstat (limited to 'docprocs')
-rw-r--r-- | docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java index 7fc2ed022dd..2561fdc7dc5 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java @@ -17,6 +17,9 @@ import com.yahoo.document.DocumentRemove; import com.yahoo.document.DocumentType; import com.yahoo.document.DocumentTypeManager; import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.serialization.DocumentSerializer; +import com.yahoo.document.serialization.DocumentSerializerFactory; +import com.yahoo.io.GrowableByteBuffer; import com.yahoo.language.Linguistics; import com.yahoo.language.process.Embedder; import com.yahoo.language.provider.DefaultEmbedderProvider; @@ -93,14 +96,24 @@ public class IndexingProcessor extends DocumentProcessor { } private void processDocument(DocumentPut prev, List<DocumentOperation> out) { - DocumentScript script = scriptMgr.getScript(prev.getDocument().getDataType()); + DocumentType hadType = prev.getDocument().getDataType(); + DocumentScript script = scriptMgr.getScript(hadType); if (script == null) { log.log(Level.FINE, "No indexing script for document '%s'.", prev.getId()); out.add(prev); return; } log.log(Level.FINE, "Processing document '%s'.", prev.getId()); - Document next = script.execute(adapterFactory, prev.getDocument()); + DocumentType wantType = docTypeMgr.getDocumentType(hadType.getName()); + Document prevDoc = prev.getDocument(); + if (hadType != wantType) { + GrowableByteBuffer buffer = new GrowableByteBuffer(64 * 1024, 2.0f); + DocumentSerializer serializer = DocumentSerializerFactory.createHead(buffer); + serializer.write(prevDoc); + buffer.flip(); + prevDoc = docTypeMgr.createDocument(buffer); + } + Document next = script.execute(adapterFactory, prevDoc); if (next == null) { log.log(Level.FINE, "Document '%s' produced no output.", prev.getId()); return; |