1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.docprocs.indexing;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.language.Linguistics;
import java.util.logging.Level;
import com.yahoo.language.process.Embedder;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;
import java.util.*;
/**
* @author Simon Thoresen Hult
*/
public class ScriptManager {
private static final FastLogger log = FastLogger.getLogger(ScriptManager.class.getName());
private static final String FULL = "[all]";
private final Map<String, Map<String, DocumentScript>> documentFieldScripts;
private final DocumentTypeManager docTypeMgr;
public ScriptManager(DocumentTypeManager docTypeMgr, IlscriptsConfig config, Linguistics linguistics,
Map<String, Embedder> embedders) {
this.docTypeMgr = docTypeMgr;
documentFieldScripts = createScriptsMap(docTypeMgr, config, linguistics, embedders);
}
private Map<String, DocumentScript> getScripts(DocumentType inputType) {
Map<String, DocumentScript> scripts = documentFieldScripts.get(inputType.getName());
if (scripts != null) return scripts;
for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
if (inputType.inherits(docTypeMgr.getDocumentType(entry.getKey())))
return entry.getValue();
}
for (Map.Entry<String, Map<String, DocumentScript>> entry : documentFieldScripts.entrySet()) {
if (docTypeMgr.getDocumentType(entry.getKey()).inherits(inputType))
return entry.getValue();
}
return null;
}
public DocumentScript getScript(DocumentType inputType) {
return getScript(inputType, FULL);
}
public DocumentScript getScript(DocumentType inputType, String inputFieldName) {
Map<String, DocumentScript> fieldScripts = getScripts(inputType);
if (fieldScripts != null) {
DocumentScript script = fieldScripts.get(inputFieldName);
if (script != null) return script;
}
return null;
}
/**
* Returns an unmodifiable map from document type name to a map of the subset of indexing statements
* to run for each input field which *only* depend on that field.
*/
private static Map<String, Map<String, DocumentScript>> createScriptsMap(DocumentTypeManager docTypeMgr,
IlscriptsConfig config,
Linguistics linguistics,
Map<String, Embedder> embedders) {
Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size());
ScriptParserContext parserContext = new ScriptParserContext(linguistics, embedders);
parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences());
parserContext.getAnnotatorConfig().setMaxTokenizeLength(config.fieldmatchmaxlength());
for (IlscriptsConfig.Ilscript ilscript : config.ilscript()) {
DocumentType documentType = docTypeMgr.getDocumentType(ilscript.doctype());
InputExpression.FieldPathOptimizer fieldPathOptimizer = new InputExpression.FieldPathOptimizer(documentType);
List<StatementExpression> expressions = new ArrayList<>(ilscript.content().size());
Map<String, DocumentScript> fieldScripts = new HashMap<>(ilscript.content().size());
for (String content : ilscript.content()) {
StatementExpression statement = parse(ilscript.doctype(), parserContext, content);
expressions.add(statement);
List<String> inputFieldNames = InputExpression.InputFieldNameExtractor.runOn(statement);
OutputExpression.OutputFieldNameExtractor outputFieldNameExtractor = new OutputExpression.OutputFieldNameExtractor();
statement.select(outputFieldNameExtractor, outputFieldNameExtractor);
statement.select(fieldPathOptimizer, fieldPathOptimizer);
if ( ! outputFieldNameExtractor.getOutputFieldNames().isEmpty()) {
String outputFieldName = outputFieldNameExtractor.getOutputFieldNames().get(0);
statement.setStatementOutput(documentType, documentType.getField(outputFieldName));
}
if (inputFieldNames.size() == 1) {
String fieldName = inputFieldNames.get(0);
ScriptExpression script;
if (fieldScripts.containsKey(fieldName)) {
DocumentScript prev = fieldScripts.get(fieldName);
List<StatementExpression> appendedList = new ArrayList<>(((ScriptExpression)prev.getExpression()).asList());
appendedList.add(statement);
script = new ScriptExpression(appendedList);
} else {
script = new ScriptExpression(statement);
}
DocumentScript documentScript = new DocumentScript(ilscript.doctype(), inputFieldNames, script);
fieldScripts.put(fieldName, documentScript);
} else {
log.log(Level.FINE, "Non single(" + inputFieldNames.size() +"" +
") inputs = " + inputFieldNames + ". Script = " + statement);
}
}
ScriptExpression script = new ScriptExpression(expressions);
script.select(fieldPathOptimizer, fieldPathOptimizer);
fieldScripts.put(FULL, new DocumentScript(ilscript.doctype(), ilscript.docfield(),script));
documentFieldScripts.put(ilscript.doctype(), Collections.unmodifiableMap(fieldScripts));
}
return Collections.unmodifiableMap(documentFieldScripts);
}
private static StatementExpression parse(String docType, ScriptParserContext parserConfig, String content) {
parserConfig.setInputStream(new IndexingInput(content));
try {
return StatementExpression.newInstance(parserConfig);
} catch (ParseException e) {
throw new IllegalArgumentException("Illegal indexing script for document type '" +
docType + "'; " + content, e);
}
}
}
|