aboutsummaryrefslogtreecommitdiffstats
path: root/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java
blob: 53709c4ff879d94434771bf8df3f1af022db0984 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.docprocs.indexing;

import java.util.ArrayList;
import java.util.List;
import com.google.inject.Inject;
import com.yahoo.component.chain.dependencies.After;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.component.chain.dependencies.Provides;
import com.yahoo.docproc.DocumentProcessor;
import com.yahoo.docproc.Processing;
import com.yahoo.document.Document;
import com.yahoo.document.DocumentOperation;
import com.yahoo.document.DocumentPut;
import com.yahoo.document.DocumentRemove;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.DocumentTypeManagerConfigurer;
import com.yahoo.document.DocumentUpdate;
import com.yahoo.document.config.DocumentmanagerConfig;
import com.yahoo.language.Linguistics;
import java.util.logging.Level;

import com.yahoo.language.process.Encoder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.AdapterFactory;
import com.yahoo.vespa.indexinglanguage.SimpleAdapterFactory;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;

/**
 * @author Simon Thoresen Hult
 */
@Provides({ IndexingProcessor.PROVIDED_NAME })
@Before({ IndexingProcessor.INDEXING_END })
@After({ IndexingProcessor.INDEXING_START, "*" })
public class IndexingProcessor extends DocumentProcessor {

    public final static String PROVIDED_NAME = "indexedDocument";
    public final static String INDEXING_START = "indexingStart";
    public final static String INDEXING_END = "indexingEnd";

    private final static FastLogger log = FastLogger.getLogger(IndexingProcessor.class.getName());
    private final DocumentTypeManager docTypeMgr;
    private final ScriptManager scriptMgr;
    private final AdapterFactory adapterFactory;

    private class ExpressionSelector extends SimpleAdapterFactory.SelectExpression {
        @Override
        public Expression selectExpression(DocumentType documentType, String fieldName) {
            return scriptMgr.getScript(documentType, fieldName).getExpression();
        }
    }

    @Inject
    public IndexingProcessor(DocumentmanagerConfig documentmanagerConfig,
                             IlscriptsConfig ilscriptsConfig,
                             Linguistics linguistics,
                             Encoder encoder) {
        docTypeMgr = DocumentTypeManagerConfigurer.configureNewManager(documentmanagerConfig);
        scriptMgr = new ScriptManager(docTypeMgr, ilscriptsConfig, linguistics, encoder);
        adapterFactory = new SimpleAdapterFactory(new ExpressionSelector());
    }

    @Override
    public Progress process(Processing proc) {
        if (proc.getDocumentOperations().isEmpty()) {
            return Progress.DONE;
        }
        List<DocumentOperation> out = new ArrayList<>(proc.getDocumentOperations().size());
        for (DocumentOperation documentOperation : proc.getDocumentOperations()) {
            if (documentOperation instanceof DocumentPut) {
                processDocument((DocumentPut)documentOperation, out);
            } else if (documentOperation instanceof DocumentUpdate) {
                processUpdate((DocumentUpdate)documentOperation, out);
            } else if (documentOperation instanceof DocumentRemove) {
                processRemove((DocumentRemove)documentOperation, out);
            } else if (documentOperation != null) {
                throw new IllegalArgumentException("Document class " + documentOperation.getClass().getName() + " not supported.");
            } else {
                throw new IllegalArgumentException("Expected document, got null.");
            }
        }
        proc.getDocumentOperations().clear();
        proc.getDocumentOperations().addAll(out);
        return Progress.DONE;
    }

    DocumentTypeManager getDocumentTypeManager() {
        return docTypeMgr;
    }

    private void processDocument(DocumentPut prev, List<DocumentOperation> out) {
        DocumentScript script = scriptMgr.getScript(prev.getDocument().getDataType());
        if (script == null) {
            log.log(Level.FINE, "No indexing script for document '%s'.", prev.getId());
            out.add(prev);
            return;
        }
        log.log(Level.FINE, "Processing document '%s'.", prev.getId());
        Document next = script.execute(adapterFactory, prev.getDocument());
        if (next == null) {
            log.log(Level.FINE, "Document '%s' produced no output.", prev.getId());
            return;
        }

        out.add(new DocumentPut(prev, next));
    }

    private void processUpdate(DocumentUpdate prev, List<DocumentOperation> out) {
        DocumentScript script = scriptMgr.getScript(prev.getType());
        if (script == null) {
            log.log(Level.FINE, "No indexing script for update '%s'.", prev.getId());
            out.add(prev);
            return;
        }
        log.log(Level.FINE, "Processing update '%s'.", prev.getId());
        DocumentUpdate next = script.execute(adapterFactory, prev);
        if (next == null) {
            log.log(Level.FINE, "Update '%s' produced no output.", prev.getId());
            return;
        }
        next.setCondition(prev.getCondition());
        out.add(next);
    }

    private void processRemove(DocumentRemove prev, List<DocumentOperation> out) {
        log.log(Level.FINE, "Not processing remove '%s'.", prev.getId());
        out.add(prev);
    }

}