aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java
blob: 40911f793c8777e7f1fc66338669852e5d407707 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.semantics;

import com.yahoo.io.IOUtils;
import com.yahoo.language.Linguistics;
import com.yahoo.prelude.semantics.parser.ParseException;
import com.yahoo.prelude.semantics.parser.SemanticsParser;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;

/**
 * Imports rule bases from various sources.
 *
 * @author bratseth
 */
// Uses the JavaCC-generated parser to read rule bases.
// This is an intermediate between the parser and the rule base being loaded
// on implementation of some directives, for example, it knows where to find
// rule bases included into others, while neither the rule base nor the parser knows.
public class RuleImporter {

    /** If this is set, imported rule bases are looked up in this config, otherwise they are looked up as files. */
    private final SemanticRulesConfig config;

    /** Ignore requests to read automata files. Useful to validate rule bases without having automatas present. */
    private final boolean ignoreAutomatas;

    /** Ignore requests to include files. Useful to validate rule bases one by one in config. */
    private final boolean ignoreIncludes;

    private final Linguistics linguistics;

    /** Create a rule importer which will read from file */
    public RuleImporter(Linguistics linguistics) {
        this(null, false, linguistics);
    }

    /** Create a rule importer which will read from a config object */
    public RuleImporter(SemanticRulesConfig config, Linguistics linguistics) {
        this(config, false, linguistics);
    }

    public RuleImporter(boolean ignoreAutomatas, Linguistics linguistics) {
        this(null, ignoreAutomatas, linguistics);
    }

    public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes, Linguistics linguistics) {
        this(null, ignoreAutomatas, ignoreIncludes, linguistics);
    }

    public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, Linguistics linguistics) {
        this(config, ignoreAutomatas, false, linguistics);
    }

    public RuleImporter(SemanticRulesConfig config,
                        boolean ignoreAutomatas,
                        boolean ignoreIncludes,
                        Linguistics linguistics) {
        this.config = config;
        this.ignoreAutomatas = ignoreAutomatas;
        this.ignoreIncludes = ignoreIncludes;
        this.linguistics = linguistics;
    }

    /**
     * Imports semantic rules from a file
     *
     * @param fileName the rule file to use
     * @throws java.io.IOException if the file can not be read for some reason
     * @throws ParseException if the file does not contain a valid semantic rule set
     */
    public RuleBase importFile(String fileName) throws IOException, ParseException {
        return importFile(fileName, null);
    }

    /**
     * Imports semantic rules from a file
     *
     * @param fileName the rule file to use
     * @param automataFile the automata file to use, or null to not use any
     * @throws java.io.IOException if the file can not be read for some reason
     * @throws ParseException if the file does not contain a valid semantic rule set
     */
    public RuleBase importFile(String fileName, String automataFile) throws IOException, ParseException {
        var ruleBase = privateImportFile(fileName, automataFile);
        ruleBase.initialize();
        return ruleBase;
    }

    public RuleBase privateImportFile(String fileName, String automataFile) throws IOException, ParseException {
        BufferedReader reader = null;
        try {
            reader = IOUtils.createReader(fileName, "utf-8");
            File file = new File(fileName);
            String absoluteFileName = file.getAbsolutePath();
            var ruleBase = new RuleBase(stripLastName(file.getName()));
            privateImportFromReader(reader, absoluteFileName, automataFile, ruleBase);
            return ruleBase;
        }
        finally {
            IOUtils.closeReader(reader);
        }
    }

    /** Read and include a rule base in another */
    public void include(String ruleBaseName, RuleBase ruleBase) throws java.io.IOException, ParseException {
        if (ignoreIncludes) return;
        RuleBase include;
        if (config == null) {
            include = privateImportFromDirectory(ruleBaseName, ruleBase);
        }
        else {
            include = privateImportFromConfig(ruleBaseName);
        }
        ruleBase.include(include);
    }

    /** Returns an uninitialized rule base */
    private RuleBase privateImportFromDirectory(String ruleBaseName, RuleBase ruleBase) throws IOException, ParseException {
        String includeDir = new File(ruleBase.getSource()).getParentFile().getAbsolutePath();
        if (!ruleBaseName.endsWith(".sr"))
            ruleBaseName = ruleBaseName + ".sr";
        File importFile = new File(includeDir, ruleBaseName);
        if ( ! importFile.exists())
            throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'");
        return privateImportFile(importFile.getPath(), null);
    }

    /** Returns an uninitialized rule base */
    private RuleBase privateImportFromConfig(String ruleBaseName) throws ParseException {
        SemanticRulesConfig.Rulebase ruleBaseConfig = findRuleBaseConfig(config,ruleBaseName);
        if (ruleBaseConfig == null)
            ruleBaseConfig = findRuleBaseConfig(config, stripLastName(ruleBaseName));
        if (ruleBaseConfig == null)
             throw new ParseException("Could not find included rule base '" + ruleBaseName + "'");
        return privateImportConfig(ruleBaseConfig);
    }

    private SemanticRulesConfig.Rulebase findRuleBaseConfig(SemanticRulesConfig config, String ruleBaseName) {
        for (SemanticRulesConfig.Rulebase ruleBase : config.rulebase()) {
            if (ruleBase.name().equals(ruleBaseName))
                return ruleBase;
        }
        return null;
    }

    public void setAutomata(RuleBase base, String automata) {
        if (ignoreAutomatas)
            base.setUsesAutomata(true); // Stop it from failing on automata condition references
        else
            base.setAutomataFile(automata);
    }

    static String stripLastName(String fileName) {
        int lastDotIndex = fileName.lastIndexOf(".");
        if (lastDotIndex < 0) return fileName;
        return fileName.substring(0, lastDotIndex);
    }

    public RuleBase importString(String string, String automataFile) throws ParseException {
        return importString(string, automataFile, null, null);
    }

    public RuleBase importString(String string, String automataFile, String sourceName, RuleBase ruleBase) throws ParseException {
        return importFromReader(new StringReader(string), sourceName, automataFile, ruleBase);
    }

    public RuleBase importConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws ParseException {
        RuleBase ruleBase = privateImportConfig(ruleBaseConfig);
        ruleBase.initialize();
        return ruleBase;
    }

    /** Imports an uninitialized rule base */
    public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws ParseException {
        if (config == null) throw new IllegalStateException("Must initialize with config if importing from config");
        RuleBase ruleBase = new RuleBase(ruleBaseConfig.name());
        return privateImportFromReader(new StringReader(ruleBaseConfig.rules()),
                                       ruleBaseConfig.name(),
                                       ruleBaseConfig.automata(),
                                       ruleBase);
    }

    /**
     * Imports rules from a reader
     *
     * @param reader the reader containing rules on the proper syntax
     * @param sourceName a string describing the source of the rules used for error messages
     * @param ruleBase an existing rule base to import the rules into, or null to create a new one
     * @return the rule base containing the rules added from the reader
     * @throws ParseException if the reader contains illegal rule syntax
     */
    public RuleBase importFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException {
        ruleBase = privateImportFromReader(reader, sourceName, automataFile, ruleBase);
        ruleBase.initialize();
        return ruleBase;
    }

    /** Returns an uninitialized rule base */
    public RuleBase privateImportFromReader(Reader reader, String inputSourceName, String automataFile, RuleBase ruleBase) throws ParseException {
        var sourceName = (inputSourceName == null ? "anonymous" : inputSourceName);
        try {
            if (ruleBase == null)
                ruleBase = new RuleBase(sourceName);
            ruleBase.setSource(sourceName.replace('\\', '/'));
            new SemanticsParser(reader, linguistics).semanticRules(ruleBase, this);
            if (automataFile != null && !automataFile.isEmpty())
                ruleBase.setAutomataFile(automataFile.replace('\\', '/'));
            return ruleBase;
        } catch (Throwable t) { // also catches token mgr errors
            ParseException p = new ParseException("Could not parse rule '" + shortenPath(sourceName) + "'");
            p.initCause(t);
            throw p;
        }
    }

    /**
     * Snips what's in front of rules/ if "rules/" is present in the string
     * to avoid displaying details about where application content is copied
     * (if rules/ is present, these rules are read from an application package)
     */
    private static String shortenPath(String path) {
        int rulesIndex = path.indexOf("rules/");
        if (rulesIndex < 0) return path;
        return path.substring(rulesIndex);
    }

}