diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-02-03 17:17:47 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-02-03 17:17:47 +0100 |
commit | ab84971140f1782c0c7a693dd45c324766094c7c (patch) | |
tree | 84816cf83b25b11e527a52e1402fe94fb37ef7f4 /searchlib | |
parent | b729ea38f6ed4e23975c54321132c59627a4c6e1 (diff) |
Unify canonicalization
Diffstat (limited to 'searchlib')
5 files changed, 13 insertions, 183 deletions
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java deleted file mode 100644 index 8912d6e55b6..00000000000 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNames.java +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.rankingexpression.evaluation; - -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * Utility methods for query, document and constant rank feature names - * - * @author bratseth - */ -public class FeatureNames { - - private static final Pattern identifierRegexp = Pattern.compile("[A-Za-z0-9_][A-Za-z0-9_-]*"); - - /** - * <p>Returns the given query, document or constant feature in canonical form. - * A feature name consists of a feature type name (query, attribute or constant), - * followed by one argument enclosed in quotes. - * The argument may be an identifier or any string single or double quoted.</p> - * - * <p>Argument string values may not contain comma, single quote nor double quote characters.</p> - * - * <p><i>The canonical form use no quotes for arguments which are identifiers, and double quotes otherwise.</i></p> - * - * <p>Note that the above definition is not true for features in general, which accept any ranking expression - * as argument.</p> - * - * @throws IllegalArgumentException if the feature name is not valid - */ - // Note that this implementation is more general than what is described above: - // It accepts any number of arguments and an optional output - public static String canonicalize(String feature) { - return canonicalizeIfValid(feature).orElseThrow(() -> - new IllegalArgumentException("A feature name must be on the form query(name), attribute(name) or " + - "constant(name), but was '" + feature + "'" - )); - } - - /** - * Canonicalizes the given argument as in canonicalize, but returns empty instead of throwing an exception if - * the argument is not a valid feature - */ - public static Optional<String> canonicalizeIfValid(String feature) { - int startParenthesis = feature.indexOf('('); - if (startParenthesis < 0) - return Optional.empty(); - int endParenthesis = feature.lastIndexOf(')'); - String featureType = feature.substring(0, startParenthesis); - if ( ! ( featureType.equals("query") || featureType.equals("attribute") || featureType.equals("constant"))) - return Optional.empty(); - if (startParenthesis < 1) return Optional.of(feature); // No arguments - if (endParenthesis < startParenthesis) - return Optional.empty(); - String argumentString = feature.substring(startParenthesis + 1, endParenthesis); - List<String> canonicalizedArguments = - Arrays.stream(argumentString.split(",")) - .map(FeatureNames::canonicalizeArgument) - .collect(Collectors.toList()); - return Optional.of(featureType + "(" + - canonicalizedArguments.stream().collect(Collectors.joining(",")) + - feature.substring(endParenthesis)); - } - - /** Canomicalizes a single argument */ - private static String canonicalizeArgument(String argument) { - if (argument.startsWith("'")) { - if ( ! argument.endsWith("'")) - throw new IllegalArgumentException("Feature arguments starting by a single quote " + - "must end by a single quote, but was \"" + argument + "\""); - argument = argument.substring(1, argument.length() - 1); - } - if (argument.startsWith("\"")) { - if ( ! argument.endsWith("\"")) - throw new IllegalArgumentException("Feature arguments starting by a double quote " + - "must end by a double quote, but was '" + argument + "'"); - argument = argument.substring(1, argument.length() - 1); - } - if (identifierRegexp.matcher(argument).matches()) - return argument; - else - return "\"" + argument + "\""; - } - - public static String asConstantFeature(String constantName) { - return canonicalize("constant(\"" + constantName + "\")"); - } - - public static String asAttributeFeature(String attributeName) { - return canonicalize("attribute(\"" + attributeName + "\")"); - } - - public static String asQueryFeature(String propertyName) { - return canonicalize("query(\"" + propertyName + "\")"); - } - - /** - * Returns the single argument of the given feature name, without any quotes, - * or empty if it is not a valid query, attribute or constant feature name - */ - public static Optional<String> argumentOf(String feature) { - return canonicalizeIfValid(feature).map(f -> { - int startParenthesis = f.indexOf("("); - int endParenthesis = f.indexOf(")"); - String possiblyQuotedArgument = f.substring(startParenthesis + 1, endParenthesis); - if (possiblyQuotedArgument.startsWith("\"")) - return possiblyQuotedArgument.substring(1, possiblyQuotedArgument.length() - 1); - else - return possiblyQuotedArgument; - }); - } - -} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java index 333af529cb9..a81d0c89f8f 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java @@ -27,7 +27,7 @@ public class MapContext extends Context { * All the Values of the map will be frozen. */ public MapContext(Map<String,Value> bindings) { - bindings.forEach((k, v) -> this.bindings.put(FeatureNames.canonicalize(k), v.freeze())); + bindings.forEach((k, v) -> this.bindings.put(k, v.freeze())); } /** @@ -43,7 +43,7 @@ public class MapContext extends Context { /** Returns the type of the given value key, or null if it is not bound. */ @Override public TensorType getType(String key) { - Value value = bindings.get(FeatureNames.canonicalize(key)); + Value value = bindings.get(key); if (value == null) return null; return value.type(); } @@ -51,7 +51,7 @@ public class MapContext extends Context { /** Returns the value of a key. 0 is returned if the given key is not bound in this. */ @Override public Value get(String key) { - return bindings.getOrDefault(FeatureNames.canonicalize(key), DoubleValue.zero); + return bindings.getOrDefault(key, DoubleValue.zero); } /** @@ -59,7 +59,7 @@ public class MapContext extends Context { */ @Override public void put(String key,Value value) { - bindings.put(FeatureNames.canonicalize(key), value.freeze()); + bindings.put(key, value.freeze()); } /** Returns an immutable view of the bindings of this. */ diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java index 0335ead4420..ff2088263d8 100644 --- a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeMapContext.java @@ -18,12 +18,12 @@ public class TypeMapContext implements TypeContext { private final Map<String, TensorType> featureTypes = new HashMap<>(); public void setType(String name, TensorType type) { - featureTypes.put(FeatureNames.canonicalize(name), type); + featureTypes.put(name, type); } @Override public TensorType getType(String name) { - return featureTypes.get(FeatureNames.canonicalize(name)); + return featureTypes.get(name); } /** Returns an unmodifiable map of the bindings in this */ diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNamesTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNamesTestCase.java deleted file mode 100644 index 55319e4ec93..00000000000 --- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/FeatureNamesTestCase.java +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.searchlib.rankingexpression.evaluation; - -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -/** - * Tests rank feature names. - * - * @author bratseth - */ -public class FeatureNamesTestCase { - - @Test - public void testCanonicalization() { - assertEquals("foo", FeatureNames.canonicalize("foo")); - assertEquals("foo.out", FeatureNames.canonicalize("foo.out")); - assertEquals("foo(bar)", FeatureNames.canonicalize("foo(bar)")); - assertEquals("foo(bar)", FeatureNames.canonicalize("foo('bar')")); - assertEquals("foo(bar)", FeatureNames.canonicalize("foo(\"bar\")")); - assertEquals("foo(bar).out", FeatureNames.canonicalize("foo(bar).out")); - assertEquals("foo(bar).out", FeatureNames.canonicalize("foo('bar').out")); - assertEquals("foo(bar).out", FeatureNames.canonicalize("foo(\"bar\").out")); - assertEquals("foo(\"ba.r\")", FeatureNames.canonicalize("foo(ba.r)")); - assertEquals("foo(\"ba.r\")", FeatureNames.canonicalize("foo('ba.r')")); - assertEquals("foo(\"ba.r\")", FeatureNames.canonicalize("foo(\"ba.r\")")); - assertEquals("foo(bar1,\"b.ar2\",\"ba/r3\").out", - FeatureNames.canonicalize("foo(bar1,b.ar2,ba/r3).out")); - assertEquals("foo(bar1,\"b.ar2\",\"ba/r3\").out", - FeatureNames.canonicalize("foo(bar1,'b.ar2',\"ba/r3\").out")); - } - - @Test - public void testArgument() { - assertEquals("bar", FeatureNames.argumentOf("foo(bar)")); - assertEquals("bar.baz", FeatureNames.argumentOf("foo(bar.baz)")); - } - - @Test - public void testConstantFeature() { - assertEquals("constant(\"foo/bar\")", FeatureNames.asConstantFeature("foo/bar")); - } - - @Test - public void testAttributeFeature() { - assertEquals("attribute(foo)", FeatureNames.asAttributeFeature("foo")); - } - - @Test - public void testQueryFeature() { - assertEquals("query(\"foo.bar\")", FeatureNames.asQueryFeature("foo.bar")); - } - -} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeResolutionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeResolutionTestCase.java index 5cac2215a00..c882c887c8d 100644 --- a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeResolutionTestCase.java +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/TypeResolutionTestCase.java @@ -19,11 +19,11 @@ public class TypeResolutionTestCase { @Test public void testTypeResolution() { TypeMapContext context = new TypeMapContext(); - context.setType("query('x1')", TensorType.fromSpec("tensor(x[])")); - context.setType("query('x2')", TensorType.fromSpec("tensor(x[10])")); - context.setType("query('y1')", TensorType.fromSpec("tensor(y[])")); - context.setType("query('xy1')", TensorType.fromSpec("tensor(x[10],y[])")); - context.setType("query('xy2')", TensorType.fromSpec("tensor(x[],y[10])")); + context.setType("query(x1)", TensorType.fromSpec("tensor(x[])")); + context.setType("query(x2)", TensorType.fromSpec("tensor(x[10])")); + context.setType("query(y1)", TensorType.fromSpec("tensor(y[])")); + context.setType("query(xy1)", TensorType.fromSpec("tensor(x[10],y[])")); + context.setType("query(xy2)", TensorType.fromSpec("tensor(x[],y[10])")); assertType("tensor(x[])", "query(x1)", context); assertType("tensor(x[])", "if (1>0, query(x1), query(x2))", context); @@ -46,7 +46,8 @@ public class TypeResolutionTestCase { fail("Expected type incompatibility exception"); } catch (IllegalArgumentException expected) { - assertEquals("An if expression must produce compatible types in both alternatives, but the 'true' type is tensor(x[]) while the 'false' type is tensor(y[])", + assertEquals("An if expression must produce compatible types in both alternatives, " + + "but the 'true' type is tensor(x[]) while the 'false' type is tensor(y[])", expected.getMessage()); } catch (ParseException e) { |