aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/searchdefinition/FeatureNames.java
blob: dd03cb8b2a7e18e4ef467cb54025d090c23db018 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
 * // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
 *
 *
 */
package com.yahoo.searchdefinition;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
 * Utility methods for query, document and constant rank feature names
 *
 * @author bratseth
 */
public class FeatureNames {

    private static final Pattern identifierRegexp = Pattern.compile("[A-Za-z0-9_][A-Za-z0-9_-]*");

    /**
     * <p>Returns the given query, document or constant feature in canonical form.
     * A feature name consists of a feature type name (query, attribute or constant),
     * followed by one argument enclosed in quotes.
     * The argument may be an identifier or any string single or double quoted.</p>
     *
     * <p>Argument string values may not contain comma, single quote nor double quote characters.</p>
     *
     * <p><i>The canonical form use no quotes for arguments which are identifiers, and double quotes otherwise.</i></p>
     *
     * <p>Note that the above definition is not true for features in general, which accept any ranking expression
     * as argument.</p>
     *
     * @throws IllegalArgumentException if the feature name is not valid
     */
    // Note that this implementation is more general than what is described above:
    // It accepts any number of arguments and an optional output
    public static String canonicalize(String feature) {
        return canonicalizeIfValid(feature).orElseThrow(() ->
            new IllegalArgumentException("A feature name must be on the form query(name), attribute(name) or " +
                                         "constant(name), but was '" + feature + "'"
        ));
    }

    /**
     * Canonicalizes the given argument as in canonicalize, but returns empty instead of throwing an exception if
     * the argument is not a valid feature
     */
    public static Optional<String> canonicalizeIfValid(String feature) {
        int startParenthesis = feature.indexOf('(');
        if (startParenthesis < 0)
            return Optional.empty();
        int endParenthesis = feature.lastIndexOf(')');
        String featureType = feature.substring(0, startParenthesis);
        if ( ! ( featureType.equals("query") || featureType.equals("attribute") || featureType.equals("constant")))
            return Optional.empty();
        if (startParenthesis < 1) return Optional.of(feature); // No arguments
        if (endParenthesis < startParenthesis)
            return Optional.empty();
        String argumentString = feature.substring(startParenthesis + 1, endParenthesis);
        List<String> canonicalizedArguments =
                Arrays.stream(argumentString.split(","))
                        .map(FeatureNames::canonicalizeArgument)
                        .collect(Collectors.toList());
        return Optional.of(featureType + "(" +
                           canonicalizedArguments.stream().collect(Collectors.joining(",")) +
                           feature.substring(endParenthesis));
    }

    /** Canomicalizes a single argument */
    private static String canonicalizeArgument(String argument) {
        if (argument.startsWith("'")) {
            if ( ! argument.endsWith("'"))
                throw new IllegalArgumentException("Feature arguments starting by a single quote " +
                                                   "must end by a single quote, but was \"" + argument + "\"");
            argument = argument.substring(1, argument.length() - 1);
        }
        if (argument.startsWith("\"")) {
            if ( ! argument.endsWith("\""))
                throw new IllegalArgumentException("Feature arguments starting by a double quote " +
                                                   "must end by a double quote, but was '" + argument + "'");
            argument = argument.substring(1, argument.length() - 1);
        }
        if (identifierRegexp.matcher(argument).matches())
            return argument;
        else
            return "\"" + argument + "\"";
    }

    public static String asConstantFeature(String constantName) {
        return canonicalize("constant(\"" + constantName + "\")");
    }

    public static String asAttributeFeature(String attributeName) {
        return canonicalize("attribute(\"" + attributeName + "\")");
    }

    public static String asQueryFeature(String propertyName) {
        return canonicalize("query(\"" + propertyName + "\")");
    }

    /**
     * Returns the single argument of the given feature name, without any quotes,
     * or empty if it is not a valid query, attribute or constant feature name
     */
    public static Optional<String> argumentOf(String feature) {
        return canonicalizeIfValid(feature).map(f -> {
            int startParenthesis = f.indexOf("(");
            int endParenthesis = f.indexOf(")");
            String possiblyQuotedArgument = f.substring(startParenthesis + 1, endParenthesis);
            if (possiblyQuotedArgument.startsWith("\""))
                return possiblyQuotedArgument.substring(1, possiblyQuotedArgument.length() - 1);
            else
                return possiblyQuotedArgument;
        });
    }

}