diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-01-31 17:36:21 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-01-31 17:36:21 +0100 |
commit | c56889931e1547a6a6db420a3c886ddf03f5bd6e (patch) | |
tree | a718a9bbacf236c54c8164def703f4e108e7287d /searchlib/src/main/java/com | |
parent | 2c25a02adbe644b3f50dc44252c6b61974d0c8d6 (diff) |
Canonicalize features
This allows us to find the type of features referenced in
ranking expressions regardless of the form they are written in.
Diffstat (limited to 'searchlib/src/main/java/com')
5 files changed, 90 insertions, 13 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java index 1f58dbe9f9d..49466f1974d 100755 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java @@ -15,7 +15,7 @@ import java.util.List; /** * Encapsulates the production rule 'featureList()' int the RankingExpressionParser. * - * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a> + * @author Simon Thoresen */ @Beta public class FeatureList implements Iterable<ReferenceNode> { diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java new file mode 100644 index 00000000000..3788a252d76 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java @@ -0,0 +1,76 @@ +package com.yahoo.searchlib.rankingexpression.evaluation;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Utility methods for working with rank feature names + * + * @author bratseth + */ +public class FeatureNames { + + private static final Pattern identifierRegexp = Pattern.compile("[A-Za-z0-9_][A-Za-z0-9_-]*"); + + /** + * Returns the given feature in canonical form. + * A feature name consists of a feature shortname, followed by zero or more arguments enclosed in quotes + * and an optional output prefixed by a dot: shortname[(argument-ist)][.output] + * Arguments may be identifiers or any strings single or double quoted. + * + * Argument string values may not contain comma, single quote nor double quote characters. + * + * <i>The canonical form use no quotes for arguments which are identifiers, and double quotes otherwise.</i> + */ + public static String canonicalize(String feature) { + int startParenthesis = feature.indexOf('('); + int endParenthesis = feature.lastIndexOf(')'); + if (startParenthesis < 1) return feature; // No arguments + if (endParenthesis < startParenthesis) + throw new IllegalArgumentException("A feature name must be on the form shortname[(argument-ist)][.output], " + + "but was '" + feature + "'"); + String argumentString = feature.substring(startParenthesis + 1, endParenthesis); + List<String> canonicalizedArguments = + Arrays.stream(argumentString.split(",")) + .map(FeatureNames::canonicalizeArgument) + .collect(Collectors.toList()); + return feature.substring(0, startParenthesis + 1) + + canonicalizedArguments.stream().collect(Collectors.joining(",")) + + feature.substring(endParenthesis); + } + + /** Canomicalizes a single argument */ + private static String canonicalizeArgument(String argument) { + if (argument.startsWith("'")) { + if ( ! argument.endsWith("'")) + throw new IllegalArgumentException("Feature arguments starting by a single quote " + + "must end by a single quote, but was \"" + argument + "\""); + argument = argument.substring(1, argument.length() - 1); + } + if (argument.startsWith("\"")) { + if ( ! argument.endsWith("\"")) + throw new IllegalArgumentException("Feature arguments starting by a double quote " + + "must end by a double quote, but was '" + argument + "'"); + argument = argument.substring(1, argument.length() - 1); + } + if (identifierRegexp.matcher(argument).matches()) + return argument; + else + return "\"" + argument + "\""; + } + + public static String asConstantFeature(String constantName) { + return canonicalize("constant(\"" + constantName + "\")"); + } + + public static String asAttributeFeature(String attributeName) { + return canonicalize("attribute(\"" + attributeName + "\")"); + } + + public static String asQueryFeature(String propertyName) { + return canonicalize("query(\"" + propertyName + "\")"); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java index 39efe641f26..333af529cb9 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java @@ -24,13 +24,10 @@ public class MapContext extends Context { /** * Creates a map context from a map. - * The ownership of the map is transferred to this - it cannot be further modified by the caller. * All the Values of the map will be frozen. */ public MapContext(Map<String,Value> bindings) { - this.bindings = bindings; - for (Value boundValue : bindings.values()) - boundValue.freeze(); + bindings.forEach((k, v) -> this.bindings.put(FeatureNames.canonicalize(k), v.freeze())); } /** @@ -46,7 +43,7 @@ public class MapContext extends Context { /** Returns the type of the given value key, or null if it is not bound. */ @Override public TensorType getType(String key) { - Value value = bindings.get(key); + Value value = bindings.get(FeatureNames.canonicalize(key)); if (value == null) return null; return value.type(); } @@ -54,19 +51,19 @@ public class MapContext extends Context { /** Returns the value of a key. 0 is returned if the given key is not bound in this. */ @Override public Value get(String key) { - return bindings.getOrDefault(key, DoubleValue.zero); + return bindings.getOrDefault(FeatureNames.canonicalize(key), DoubleValue.zero); } /** - * Sets the value of a key.The value is frozen by this. + * Sets the value of a key. The value is frozen by this. */ @Override public void put(String key,Value value) { - bindings.put(key,value.freeze()); + bindings.put(FeatureNames.canonicalize(key), value.freeze()); } /** Returns an immutable view of the bindings of this. */ - public Map<String,Value> bindings() { + public Map<String, Value> bindings() { if (frozen) return bindings; return Collections.unmodifiableMap(bindings); } diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java index a018aae0c3e..0335ead4420 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java @@ -4,6 +4,7 @@ package com.yahoo.searchlib.rankingexpression.evaluation;// Copyright 2018 Yahoo import com.yahoo.tensor.TensorType; import com.yahoo.tensor.evaluation.TypeContext; +import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -17,12 +18,15 @@ public class TypeMapContext implements TypeContext { private final Map<String, TensorType> featureTypes = new HashMap<>(); public void setType(String name, TensorType type) { - featureTypes.put(name, type); + featureTypes.put(FeatureNames.canonicalize(name), type); } @Override public TensorType getType(String name) { - return featureTypes.get(name); + return featureTypes.get(FeatureNames.canonicalize(name)); } + /** Returns an unmodifiable map of the bindings in this */ + public Map<String, TensorType> bindings() { return Collections.unmodifiableMap(featureTypes); } + } diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java index f79297f7773..05a6773c5cb 100755 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java @@ -108,7 +108,7 @@ public final class ReferenceNode extends CompositeNode { @Override public TensorType type(TypeContext context) { // Don't support outputs of different type, for simplicity - return context.getType(name); + return context.getType(toString()); } @Override |