aboutsummaryrefslogtreecommitdiffstats
path: root/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java
blob: 015a5ceb79d76d684eb9461385bb9b8b5ebac285 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.docprocs.indexing;

import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.language.Linguistics;
import com.yahoo.log.LogLevel;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;

import java.util.*;
import java.util.logging.Level;

/**
 * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
 */
public class ScriptManager {

    private static final FastLogger log = FastLogger.getLogger(ScriptManager.class.getName());
    private static final String FULL = "[all]";
    private final Map<String, Map<String, DocumentScript>> documentFieldScripts;
    private final DocumentTypeManager docTypeMgr;

    public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics) {
        this.docTypeMgr = docTypeMgr;
        documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics);
    }


    private Map<String, DocumentScript> getScripts(DocumentType inputType) {
        Map<String, DocumentScript> scripts = documentFieldScripts.get(inputType.getName());
        if (scripts != null) {
            log.log(LogLevel.DEBUG, "Using script for type '%s'.", inputType.getName());
            return scripts;
        }
        for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
            if (inputType.inherits(docTypeMgr.getDocumentType(entry.getKey()))) {
                log.log(LogLevel.DEBUG, "Using script of super-type '%s'.", entry.getKey());
                return entry.getValue();
            }
        }
        for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
            if (docTypeMgr.getDocumentType(entry.getKey()).inherits(inputType)) {
                log.log(LogLevel.DEBUG, "Using script of sub-type '%s'.", entry.getKey());
                return entry.getValue();
            }
        }
        log.log(LogLevel.DEBUG, "No script for type '%s'.", inputType.getName());
        return null;
    }

    public DocumentScript getScript(DocumentType inputType) {
        return getScript(inputType, FULL);
    }

    public DocumentScript getScript(DocumentType inputType, String inputFieldName) {
        Map<String, DocumentScript> fieldScripts = getScripts(inputType);
        if (fieldScripts != null) {
            DocumentScript script = fieldScripts.get(inputFieldName);
            if (script != null) {
                log.log(LogLevel.DEBUG, "Using script for type '%s' and field '%s'.", inputType.getName(), inputFieldName);
                return script;
            }
        }
        return null;
    }

    private static Map<String, Map<String, DocumentScript>>  createScriptsMap(DocumentTypeManager docTypeMgr,
                                                                              IlscriptsConfig config,
                                                                              Linguistics linguistics) {
        Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size());
        ScriptParserContext parserContext = new ScriptParserContext(linguistics);
        parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences());

        for (IlscriptsConfig.Ilscript ilscript : config.ilscript()) {
            InputExpression.FieldPathOptimizer fieldPathOptimizer = new InputExpression.FieldPathOptimizer(docTypeMgr.getDocumentType(ilscript.doctype()));
            List<StatementExpression> expressions = new ArrayList<>(ilscript.content().size());
            Map<String, DocumentScript> fieldScripts = new HashMap<>(ilscript.content().size());
            for (String content : ilscript.content()) {
                expressions.add(parse(ilscript.doctype(), parserContext, content));
                StatementExpression statement = parse(ilscript.doctype(), parserContext, content);
                InputExpression.InputFieldNameExtractor inputFieldNameExtractor = new InputExpression.InputFieldNameExtractor();
                statement.select(inputFieldNameExtractor, inputFieldNameExtractor);
                statement.select(fieldPathOptimizer, fieldPathOptimizer);
                if (inputFieldNameExtractor.getInputFieldNames().size() == 1) {
                    String fieldName = inputFieldNameExtractor.getInputFieldNames().get(0);
                    ScriptExpression script;
                    if (fieldScripts.containsKey(fieldName)) {
                        DocumentScript prev = fieldScripts.get(fieldName);
                        List<StatementExpression> appendedList = new ArrayList<>(((ScriptExpression)prev.getExpression()).asList());
                        appendedList.add(statement);
                        script = new ScriptExpression(appendedList);
                        log.log(Level.FINE, "Appending script for field '" + fieldName + "' = " + statement);
                        log.log(Level.FINE, "Full script for field '" + fieldName + "' = " + appendedList);
                    } else {
                        script = new ScriptExpression(statement);
                        log.log(Level.FINE, "Setting script for field '" + fieldName + "' = " + statement);
                    }
                    DocumentScript documentScript = new DocumentScript(ilscript.doctype(), inputFieldNameExtractor.getInputFieldNames(), script);
                    fieldScripts.put(fieldName, documentScript);
                } else {
                    log.log(Level.FINE, "Non single(" + inputFieldNameExtractor.getInputFieldNames().size() +") inputs = " + inputFieldNameExtractor.getInputFieldNames() + ". Script = " + statement);
                }
            }

            ScriptExpression script = new ScriptExpression(expressions);
            script.select(fieldPathOptimizer, fieldPathOptimizer);
            fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(),script));
            documentFieldScripts.put(ilscript.doctype(), Collections.unmodifiableMap(fieldScripts));
        }
        return Collections.unmodifiableMap(documentFieldScripts);
    }

    private static StatementExpression parse(String docType, ScriptParserContext parserConfig, String content) {
        parserConfig.setInputStream(new IndexingInput(content));
        try {
            return StatementExpression.newInstance(parserConfig);
        } catch (ParseException e) {
            throw new IllegalArgumentException("Illegal indexing script for document type '" +
                                               docType + "'; " + content, e);
        }
    }
}