summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-19 09:36:51 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-19 09:36:51 +0000
commitae1297268fa447f7fb7f2f548d4f32c5327ec187 (patch)
treedb10da7ae3848d24d9be728175b36b9596d1e19b
parentb70147cbdc3139ae9ea43b8341b79b3e1cbbdd1b (diff)
update SelectParser and add targetHits
* targetHits is now the preferred annotation replacing targetNumHits (for wand,weakand,nearestneighbor), the old name still works as an alias * note: targetNumHits is still produced when serializing to YQL * debugging/trace output will print the "targetHits" form * add nearestNeighborItem support to SelectParser * implement disclose() in nearestNeighborItem
-rw-r--r--container-search/abi-spec.json3
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/SelectParser.java51
-rw-r--r--container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java23
-rw-r--r--container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java6
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java14
-rw-r--r--container-search/src/test/java/com/yahoo/select/SelectTestCase.java4
8 files changed, 93 insertions, 23 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 2b4424654a2..ba52826cd3f 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -869,7 +869,8 @@
"public java.lang.String getName()",
"public int getTermCount()",
"public int encode(java.nio.ByteBuffer)",
- "protected void appendBodyString(java.lang.StringBuilder)"
+ "protected void appendBodyString(java.lang.StringBuilder)",
+ "public void disclose(com.yahoo.prelude.query.textualrepresentation.Discloser)"
],
"fields": []
},
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java
index 52ef6c40a6a..be3ae913476 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java
@@ -4,6 +4,7 @@ package com.yahoo.prelude.query;
import com.google.common.annotations.Beta;
import com.yahoo.compress.IntegerCompressor;
+import com.yahoo.prelude.query.textualrepresentation.Discloser;
import java.nio.ByteBuffer;
@@ -83,7 +84,17 @@ public class NearestNeighborItem extends SimpleTaggableItem {
buffer.append(",queryTensorName=").append(queryTensorName);
buffer.append(",hnsw.exploreAdditionalHits=").append(hnswExploreAdditionalHits);
buffer.append(",approximate=").append(approximate);
- buffer.append(",targetNumHits=").append(targetNumHits).append("}");
+ buffer.append(",targetHits=").append(targetNumHits).append("}");
+ }
+
+ @Override
+ public void disclose(Discloser discloser) {
+ super.disclose(discloser);
+ discloser.addProperty("field", field);
+ discloser.addProperty("queryTensorName", queryTensorName);
+ discloser.addProperty("hnsw.exploreAdditionalHits", hnswExploreAdditionalHits);
+ discloser.addProperty("approximate", approximate);
+ discloser.addProperty("targetHits", targetNumHits);
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
index 775dca7c444..42e1bf46902 100644
--- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
+++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
@@ -16,6 +16,7 @@ import com.yahoo.prelude.query.IntItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.Limit;
import com.yahoo.prelude.query.NearItem;
+import com.yahoo.prelude.query.NearestNeighborItem;
import com.yahoo.prelude.query.NotItem;
import com.yahoo.prelude.query.ONearItem;
import com.yahoo.prelude.query.OrItem;
@@ -93,14 +94,17 @@ public class SelectParser implements Parser {
private static final String ACCENT_DROP = "accentDrop";
private static final String ALTERNATIVES = "alternatives";
private static final String AND_SEGMENTING = "andSegmenting";
+ private static final String APPROXIMATE = "approximate";
private static final String DISTANCE = "distance";
private static final String DOT_PRODUCT = "dotProduct";
private static final String EQUIV = "equiv";
private static final String FILTER = "filter";
private static final String HIT_LIMIT = "hitLimit";
+ private static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits";
private static final String IMPLICIT_TRANSFORMS = "implicitTransforms";
private static final String LABEL = "label";
private static final String NEAR = "near";
+ private static final String NEAREST_NEIGHBOR = "nearestNeighbor";
private static final String NORMALIZE_CASE = "normalizeCase";
private static final String ONEAR = "onear";
private static final String PHRASE = "phrase";
@@ -114,6 +118,7 @@ public class SelectParser implements Parser {
private static final String STEM = "stem";
private static final String SUBSTRING = "substring";
private static final String SUFFIX = "suffix";
+ private static final String TARGET_HITS = "targetHits";
private static final String TARGET_NUM_HITS = "targetNumHits";
private static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor";
private static final String UNIQUE_ID = "id";
@@ -130,7 +135,7 @@ public class SelectParser implements Parser {
private static final String CONTAINS = "contains";
private static final String MATCHES = "matches";
private static final String CALL = "call";
- private static final List<String> FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, PREDICATE, RANK, WEAK_AND);
+ private static final List<String> FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, NEAREST_NEIGHBOR, PREDICATE, RANK, WEAK_AND);
public SelectParser(ParserEnvironment environment) {
indexFacts = environment.getIndexFacts();
@@ -259,6 +264,8 @@ public class SelectParser implements Parser {
return buildWeightedSet(key, value);
case DOT_PRODUCT:
return buildDotProduct(key, value);
+ case NEAREST_NEIGHBOR:
+ return buildNearestNeighbor(key, value);
case PREDICATE:
return buildPredicate(key, value);
case RANK:
@@ -266,7 +273,7 @@ public class SelectParser implements Parser {
case WEAK_AND:
return buildWeakAnd(key, value);
default:
- throw newUnexpectedArgumentException(key, DOT_PRODUCT, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE);
+ throw newUnexpectedArgumentException(key, DOT_PRODUCT, NEAREST_NEIGHBOR, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE);
}
}
@@ -403,6 +410,38 @@ public class SelectParser implements Parser {
return orItem;
}
+ private Item buildNearestNeighbor(String key, Inspector value) {
+
+ HashMap<Integer, Inspector> children = childMap(value);
+ Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size());
+ String field = children.get(0).asString();
+ String property = children.get(0).asString();
+ NearestNeighborItem item = new NearestNeighborItem(field, property);
+ Inspector annotations = getAnnotations(value);
+ if (annotations != null){
+ annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> {
+ if (TARGET_HITS.equals(annotation_name)){
+ item.setTargetNumHits((int)(annotation_value.asDouble()));
+ }
+ if (TARGET_NUM_HITS.equals(annotation_name)){
+ item.setTargetNumHits((int)(annotation_value.asDouble()));
+ }
+ if (HNSW_EXPLORE_ADDITIONAL_HITS.equals(annotation_name)) {
+ int hnswExploreAdditionalHits = (int)(annotation_value.asDouble());
+ item.setHnswExploreAdditionalHits(hnswExploreAdditionalHits);
+ }
+ if (APPROXIMATE.equals(annotation_name)) {
+ boolean allowApproximate = annotation_value.asBool();
+ item.setAllowApproximate(allowApproximate);
+ }
+ if (LABEL.equals(annotation_name)) {
+ item.setLabel(annotation_value.asString());
+ }
+ });
+ }
+ return item;
+ }
+
private CompositeItem buildWeakAnd(String key, Inspector value) {
WeakAndItem weakAnd = new WeakAndItem();
addItemsFromInspector(weakAnd, value);
@@ -410,6 +449,9 @@ public class SelectParser implements Parser {
if (annotations != null){
annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> {
+ if (TARGET_HITS.equals(annotation_name)){
+ weakAnd.setN((int)(annotation_value.asDouble()));
+ }
if (TARGET_NUM_HITS.equals(annotation_name)){
weakAnd.setN((int)(annotation_value.asDouble()));
}
@@ -662,7 +704,10 @@ public class SelectParser implements Parser {
HashMap<Integer, Inspector> children = childMap(value);
Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size());
- Integer target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS);
+ Integer target_num_hits= getIntegerAnnotation(TARGET_HITS, annotations, null);
+ if (target_num_hits == null) {
+ target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS);
+ }
WandItem out = new WandItem(children.get(0).asString(), target_num_hits);
diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java
index 76b8c1ef8a2..aca2998cba3 100644
--- a/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java
+++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java
@@ -97,7 +97,7 @@ public class ValidateNearestNeighborSearcher extends Searcher {
/** Returns an error message if this is invalid, or null if it is valid */
private String validate(NearestNeighborItem item) {
if (item.getTargetNumHits() < 1)
- return item + " has invalid targetNumHits " + item.getTargetNumHits() + ": Must be >= 1";
+ return item + " has invalid targetHits " + item.getTargetNumHits() + ": Must be >= 1";
String queryFeatureName = "query(" + item.getQueryTensorName() + ")";
Optional<Tensor> queryTensor = query.getRanking().getFeatures().getTensor(queryFeatureName);
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index f4560806dd2..7d17fe4f09d 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -173,6 +173,7 @@ public class YqlParser implements Parser {
static final String STEM = "stem";
static final String SUBSTRING = "substring";
static final String SUFFIX = "suffix";
+ static final String TARGET_HITS = "targetHits";
static final String TARGET_NUM_HITS = "targetNumHits";
static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor";
static final String UNIQUE_ID = "id";
@@ -418,8 +419,12 @@ public class YqlParser implements Parser {
String field = fetchFieldRead(args.get(0));
String property = fetchFieldRead(args.get(1));
NearestNeighborItem item = new NearestNeighborItem(field, property);
- Integer targetNumHits = getAnnotation(ast, TARGET_NUM_HITS,
+ Integer targetNumHits = getAnnotation(ast, TARGET_HITS,
Integer.class, null, "desired minimum hits to produce");
+ if (targetNumHits == null) {
+ targetNumHits = getAnnotation(ast, TARGET_NUM_HITS,
+ Integer.class, null, "desired minimum hits to produce");
+ }
if (targetNumHits != null) {
item.setTargetNumHits(targetNumHits);
}
@@ -504,9 +509,13 @@ public class YqlParser implements Parser {
List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size());
- WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast,
- TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS,
- "desired number of hits to accumulate in wand"));
+ Integer targetNumHits = getAnnotation(ast, TARGET_HITS,
+ Integer.class, null, "desired number of hits to accumulate in wand");
+ if (targetNumHits == null) {
+ targetNumHits = getAnnotation(ast, TARGET_NUM_HITS,
+ Integer.class, DEFAULT_TARGET_NUM_HITS, "desired number of hits to accumulate in wand");
+ }
+ WandItem out = new WandItem(getIndex(args.get(0)), targetNumHits);
Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, Double.class, null,
"min score for hit inclusion");
if (scoreThreshold != null) {
@@ -1028,8 +1037,12 @@ public class YqlParser implements Parser {
private CompositeItem buildWeakAnd(OperatorNode<ExpressionOperator> spec) {
WeakAndItem weakAnd = new WeakAndItem();
- Integer targetNumHits = getAnnotation(spec, TARGET_NUM_HITS,
+ Integer targetNumHits = getAnnotation(spec, TARGET_HITS,
Integer.class, null, "desired minimum hits to produce");
+ if (targetNumHits == null) {
+ targetNumHits = getAnnotation(spec, TARGET_NUM_HITS,
+ Integer.class, null, "desired minimum hits to produce");
+ }
if (targetNumHits != null) {
weakAnd.setN(targetNumHits);
}
diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java
index 2c849a9b52c..c49603737a6 100644
--- a/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java
@@ -93,7 +93,7 @@ public class ValidateNearestNeighborTestCase {
}
private String makeQuery(String attributeTensor, String queryTensor) {
- return "select * from sources * where [{\"targetNumHits\":1}]nearestNeighbor(" + attributeTensor + ", " + queryTensor + ");";
+ return "select * from sources * where [{\"targetHits\":1}]nearestNeighbor(" + attributeTensor + ", " + queryTensor + ");";
}
@Test
@@ -139,7 +139,7 @@ public class ValidateNearestNeighborTestCase {
r.append(",queryTensorName=").append(qt);
r.append(",hnsw.exploreAdditionalHits=0");
r.append(",approximate=true");
- r.append(",targetNumHits=").append(th);
+ r.append(",targetHits=").append(th);
r.append("} ").append(errmsg);
return r.toString();
}
@@ -149,7 +149,7 @@ public class ValidateNearestNeighborTestCase {
String q = "select * from sources * where nearestNeighbor(dvector,qvector);";
Tensor t = makeTensor(tt_dense_dvector_3);
Result r = doSearch(searcher, q, t);
- assertErrMsg(desc("dvector", "qvector", 0, "has invalid targetNumHits 0: Must be >= 1"), r);
+ assertErrMsg(desc("dvector", "qvector", 0, "has invalid targetHits 0: Must be >= 1"), r);
}
@Test
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index e43dbd4e266..2ace21daace 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -513,7 +513,7 @@ public class YqlParserTestCase {
public void testWand() {
assertParse("select foo from bar where wand(description, {\"a\":1, \"b\":2});",
"WAND(10,0.0,1.0) description{[1]:\"a\",[2]:\"b\"}");
- assertParse("select foo from bar where [ {\"scoreThreshold\": 13.3, \"targetNumHits\": 7, " +
+ assertParse("select foo from bar where [ {\"scoreThreshold\": 13.3, \"targetHits\": 7, " +
"\"thresholdBoostFactor\": 2.3} ]wand(description, {\"a\":1, \"b\":2});",
"WAND(7,13.3,2.3) description{[1]:\"a\",[2]:\"b\"}");
}
@@ -550,11 +550,11 @@ public class YqlParserTestCase {
@Test
public void testNearestNeighbor() {
assertParse("select foo from bar where nearestNeighbor(semantic_embedding, my_vector);",
- "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetNumHits=0}");
- assertParse("select foo from bar where [{\"targetNumHits\": 37}]nearestNeighbor(semantic_embedding, my_vector);",
- "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetNumHits=37}");
- assertParse("select foo from bar where [{\"approximate\": false, \"hnsw.exploreAdditionalHits\": 8, \"targetNumHits\": 3}]nearestNeighbor(semantic_embedding, my_vector);",
- "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=8,approximate=false,targetNumHits=3}");
+ "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=0}");
+ assertParse("select foo from bar where [{\"targetHits\": 37}]nearestNeighbor(semantic_embedding, my_vector);",
+ "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=37}");
+ assertParse("select foo from bar where [{\"approximate\": false, \"hnsw.exploreAdditionalHits\": 8, \"targetHits\": 3}]nearestNeighbor(semantic_embedding, my_vector);",
+ "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=8,approximate=false,targetHits=3}");
}
@Test
@@ -597,7 +597,7 @@ public class YqlParserTestCase {
public void testWeakAnd() {
assertParse("select foo from bar where weakAnd(a contains \"A\", b contains \"B\");",
"WAND(100) a:A b:B");
- assertParse("select foo from bar where [{\"targetNumHits\": 37}]weakAnd(a contains \"A\", " +
+ assertParse("select foo from bar where [{\"targetHits\": 37}]weakAnd(a contains \"A\", " +
"b contains \"B\");",
"WAND(37) a:A b:B");
diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
index 1715ed38964..d2896aca870 100644
--- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
+++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
@@ -473,7 +473,7 @@ public class SelectTestCase {
public void testWand() {
assertParse("{ \"wand\": [\"description\", { \"a\": 1, \"b\": 2 }] }",
"WAND(10,0.0,1.0) description{[1]:\"a\",[2]:\"b\"}");
- assertParse("{ \"wand\": { \"children\": [\"description\", { \"a\": 1, \"b\": 2 }], \"attributes\": { \"scoreThreshold\": 13.3, \"targetNumHits\": 7, \"thresholdBoostFactor\": 2.3 } } }",
+ assertParse("{ \"wand\": { \"children\": [\"description\", { \"a\": 1, \"b\": 2 }], \"attributes\": { \"scoreThreshold\": 13.3, \"targetHits\": 7, \"thresholdBoostFactor\": 2.3 } } }",
"WAND(7,13.3,2.3) description{[1]:\"a\",[2]:\"b\"}");
}
@@ -525,7 +525,7 @@ public class SelectTestCase {
public void testWeakAnd() {
assertParse("{ \"weakAnd\": [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ] }",
"WAND(100) a:A b:B");
- assertParse("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"targetNumHits\": 37} }}",
+ assertParse("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"targetHits\": 37} }}",
"WAND(37) a:A b:B");
QueryTree tree = parseWhere("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"scoreThreshold\": 41}}}");