aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/main/java/com
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2018-01-31 17:36:21 +0100
committerJon Bratseth <bratseth@oath.com>2018-01-31 17:36:21 +0100
commitc56889931e1547a6a6db420a3c886ddf03f5bd6e (patch)
treea718a9bbacf236c54c8164def703f4e108e7287d /searchlib/src/main/java/com
parent2c25a02adbe644b3f50dc44252c6b61974d0c8d6 (diff)
Canonicalize features
This allows us to find the type of features referenced in ranking expressions regardless of the form they are written in.
Diffstat (limited to 'searchlib/src/main/java/com')
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java2
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java76
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java15
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java8
-rwxr-xr-xsearchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java2
5 files changed, 90 insertions, 13 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java
index 1f58dbe9f9d..49466f1974d 100755
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java
@@ -15,7 +15,7 @@ import java.util.List;
/**
* Encapsulates the production rule 'featureList()' int the RankingExpressionParser.
*
- * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ * @author Simon Thoresen
*/
@Beta
public class FeatureList implements Iterable<ReferenceNode> {
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java
new file mode 100644
index 00000000000..3788a252d76
--- /dev/null
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java
@@ -0,0 +1,76 @@
+package com.yahoo.searchlib.rankingexpression.evaluation;// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Utility methods for working with rank feature names
+ *
+ * @author bratseth
+ */
+public class FeatureNames {
+
+ private static final Pattern identifierRegexp = Pattern.compile("[A-Za-z0-9_][A-Za-z0-9_-]*");
+
+ /**
+ * Returns the given feature in canonical form.
+ * A feature name consists of a feature shortname, followed by zero or more arguments enclosed in quotes
+ * and an optional output prefixed by a dot: shortname[(argument-ist)][.output]
+ * Arguments may be identifiers or any strings single or double quoted.
+ *
+ * Argument string values may not contain comma, single quote nor double quote characters.
+ *
+ * <i>The canonical form use no quotes for arguments which are identifiers, and double quotes otherwise.</i>
+ */
+ public static String canonicalize(String feature) {
+ int startParenthesis = feature.indexOf('(');
+ int endParenthesis = feature.lastIndexOf(')');
+ if (startParenthesis < 1) return feature; // No arguments
+ if (endParenthesis < startParenthesis)
+ throw new IllegalArgumentException("A feature name must be on the form shortname[(argument-ist)][.output], " +
+ "but was '" + feature + "'");
+ String argumentString = feature.substring(startParenthesis + 1, endParenthesis);
+ List<String> canonicalizedArguments =
+ Arrays.stream(argumentString.split(","))
+ .map(FeatureNames::canonicalizeArgument)
+ .collect(Collectors.toList());
+ return feature.substring(0, startParenthesis + 1) +
+ canonicalizedArguments.stream().collect(Collectors.joining(",")) +
+ feature.substring(endParenthesis);
+ }
+
+ /** Canomicalizes a single argument */
+ private static String canonicalizeArgument(String argument) {
+ if (argument.startsWith("'")) {
+ if ( ! argument.endsWith("'"))
+ throw new IllegalArgumentException("Feature arguments starting by a single quote " +
+ "must end by a single quote, but was \"" + argument + "\"");
+ argument = argument.substring(1, argument.length() - 1);
+ }
+ if (argument.startsWith("\"")) {
+ if ( ! argument.endsWith("\""))
+ throw new IllegalArgumentException("Feature arguments starting by a double quote " +
+ "must end by a double quote, but was '" + argument + "'");
+ argument = argument.substring(1, argument.length() - 1);
+ }
+ if (identifierRegexp.matcher(argument).matches())
+ return argument;
+ else
+ return "\"" + argument + "\"";
+ }
+
+ public static String asConstantFeature(String constantName) {
+ return canonicalize("constant(\"" + constantName + "\")");
+ }
+
+ public static String asAttributeFeature(String attributeName) {
+ return canonicalize("attribute(\"" + attributeName + "\")");
+ }
+
+ public static String asQueryFeature(String propertyName) {
+ return canonicalize("query(\"" + propertyName + "\")");
+ }
+
+}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java
index 39efe641f26..333af529cb9 100644
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java
@@ -24,13 +24,10 @@ public class MapContext extends Context {
/**
* Creates a map context from a map.
- * The ownership of the map is transferred to this - it cannot be further modified by the caller.
* All the Values of the map will be frozen.
*/
public MapContext(Map<String,Value> bindings) {
- this.bindings = bindings;
- for (Value boundValue : bindings.values())
- boundValue.freeze();
+ bindings.forEach((k, v) -> this.bindings.put(FeatureNames.canonicalize(k), v.freeze()));
}
/**
@@ -46,7 +43,7 @@ public class MapContext extends Context {
/** Returns the type of the given value key, or null if it is not bound. */
@Override
public TensorType getType(String key) {
- Value value = bindings.get(key);
+ Value value = bindings.get(FeatureNames.canonicalize(key));
if (value == null) return null;
return value.type();
}
@@ -54,19 +51,19 @@ public class MapContext extends Context {
/** Returns the value of a key. 0 is returned if the given key is not bound in this. */
@Override
public Value get(String key) {
- return bindings.getOrDefault(key, DoubleValue.zero);
+ return bindings.getOrDefault(FeatureNames.canonicalize(key), DoubleValue.zero);
}
/**
- * Sets the value of a key.The value is frozen by this.
+ * Sets the value of a key. The value is frozen by this.
*/
@Override
public void put(String key,Value value) {
- bindings.put(key,value.freeze());
+ bindings.put(FeatureNames.canonicalize(key), value.freeze());
}
/** Returns an immutable view of the bindings of this. */
- public Map<String,Value> bindings() {
+ public Map<String, Value> bindings() {
if (frozen) return bindings;
return Collections.unmodifiableMap(bindings);
}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java
index a018aae0c3e..0335ead4420 100644
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java
@@ -4,6 +4,7 @@ package com.yahoo.searchlib.rankingexpression.evaluation;// Copyright 2018 Yahoo
import com.yahoo.tensor.TensorType;
import com.yahoo.tensor.evaluation.TypeContext;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -17,12 +18,15 @@ public class TypeMapContext implements TypeContext {
private final Map<String, TensorType> featureTypes = new HashMap<>();
public void setType(String name, TensorType type) {
- featureTypes.put(name, type);
+ featureTypes.put(FeatureNames.canonicalize(name), type);
}
@Override
public TensorType getType(String name) {
- return featureTypes.get(name);
+ return featureTypes.get(FeatureNames.canonicalize(name));
}
+ /** Returns an unmodifiable map of the bindings in this */
+ public Map<String, TensorType> bindings() { return Collections.unmodifiableMap(featureTypes); }
+
}
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java
index f79297f7773..05a6773c5cb 100755
--- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java
@@ -108,7 +108,7 @@ public final class ReferenceNode extends CompositeNode {
@Override
public TensorType type(TypeContext context) {
// Don't support outputs of different type, for simplicity
- return context.getType(name);
+ return context.getType(toString());
}
@Override