From ae1297268fa447f7fb7f2f548d4f32c5327ec187 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Fri, 19 Jun 2020 09:36:51 +0000 Subject: update SelectParser and add targetHits * targetHits is now the preferred annotation replacing targetNumHits (for wand,weakand,nearestneighbor), the old name still works as an alias * note: targetNumHits is still produced when serializing to YQL * debugging/trace output will print the "targetHits" form * add nearestNeighborItem support to SelectParser * implement disclose() in nearestNeighborItem --- container-search/abi-spec.json | 3 +- .../yahoo/prelude/query/NearestNeighborItem.java | 13 +++++- .../java/com/yahoo/search/query/SelectParser.java | 51 ++++++++++++++++++++-- .../searchers/ValidateNearestNeighborSearcher.java | 2 +- .../main/java/com/yahoo/search/yql/YqlParser.java | 23 +++++++--- .../searchers/ValidateNearestNeighborTestCase.java | 6 +-- .../com/yahoo/search/yql/YqlParserTestCase.java | 14 +++--- .../test/java/com/yahoo/select/SelectTestCase.java | 4 +- 8 files changed, 93 insertions(+), 23 deletions(-) (limited to 'container-search') diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 2b4424654a2..ba52826cd3f 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -869,7 +869,8 @@ "public java.lang.String getName()", "public int getTermCount()", "public int encode(java.nio.ByteBuffer)", - "protected void appendBodyString(java.lang.StringBuilder)" + "protected void appendBodyString(java.lang.StringBuilder)", + "public void disclose(com.yahoo.prelude.query.textualrepresentation.Discloser)" ], "fields": [] }, diff --git a/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java b/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java index 52ef6c40a6a..be3ae913476 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/NearestNeighborItem.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.query; import com.google.common.annotations.Beta; import com.yahoo.compress.IntegerCompressor; +import com.yahoo.prelude.query.textualrepresentation.Discloser; import java.nio.ByteBuffer; @@ -83,7 +84,17 @@ public class NearestNeighborItem extends SimpleTaggableItem { buffer.append(",queryTensorName=").append(queryTensorName); buffer.append(",hnsw.exploreAdditionalHits=").append(hnswExploreAdditionalHits); buffer.append(",approximate=").append(approximate); - buffer.append(",targetNumHits=").append(targetNumHits).append("}"); + buffer.append(",targetHits=").append(targetNumHits).append("}"); + } + + @Override + public void disclose(Discloser discloser) { + super.disclose(discloser); + discloser.addProperty("field", field); + discloser.addProperty("queryTensorName", queryTensorName); + discloser.addProperty("hnsw.exploreAdditionalHits", hnswExploreAdditionalHits); + discloser.addProperty("approximate", approximate); + discloser.addProperty("targetHits", targetNumHits); } } diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java index 775dca7c444..42e1bf46902 100644 --- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -16,6 +16,7 @@ import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; import com.yahoo.prelude.query.NearItem; +import com.yahoo.prelude.query.NearestNeighborItem; import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.ONearItem; import com.yahoo.prelude.query.OrItem; @@ -93,14 +94,17 @@ public class SelectParser implements Parser { private static final String ACCENT_DROP = "accentDrop"; private static final String ALTERNATIVES = "alternatives"; private static final String AND_SEGMENTING = "andSegmenting"; + private static final String APPROXIMATE = "approximate"; private static final String DISTANCE = "distance"; private static final String DOT_PRODUCT = "dotProduct"; private static final String EQUIV = "equiv"; private static final String FILTER = "filter"; private static final String HIT_LIMIT = "hitLimit"; + private static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits"; private static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; private static final String LABEL = "label"; private static final String NEAR = "near"; + private static final String NEAREST_NEIGHBOR = "nearestNeighbor"; private static final String NORMALIZE_CASE = "normalizeCase"; private static final String ONEAR = "onear"; private static final String PHRASE = "phrase"; @@ -114,6 +118,7 @@ public class SelectParser implements Parser { private static final String STEM = "stem"; private static final String SUBSTRING = "substring"; private static final String SUFFIX = "suffix"; + private static final String TARGET_HITS = "targetHits"; private static final String TARGET_NUM_HITS = "targetNumHits"; private static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; private static final String UNIQUE_ID = "id"; @@ -130,7 +135,7 @@ public class SelectParser implements Parser { private static final String CONTAINS = "contains"; private static final String MATCHES = "matches"; private static final String CALL = "call"; - private static final List FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, PREDICATE, RANK, WEAK_AND); + private static final List FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, NEAREST_NEIGHBOR, PREDICATE, RANK, WEAK_AND); public SelectParser(ParserEnvironment environment) { indexFacts = environment.getIndexFacts(); @@ -259,6 +264,8 @@ public class SelectParser implements Parser { return buildWeightedSet(key, value); case DOT_PRODUCT: return buildDotProduct(key, value); + case NEAREST_NEIGHBOR: + return buildNearestNeighbor(key, value); case PREDICATE: return buildPredicate(key, value); case RANK: @@ -266,7 +273,7 @@ public class SelectParser implements Parser { case WEAK_AND: return buildWeakAnd(key, value); default: - throw newUnexpectedArgumentException(key, DOT_PRODUCT, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE); + throw newUnexpectedArgumentException(key, DOT_PRODUCT, NEAREST_NEIGHBOR, RANK, WAND, WEAK_AND, WEIGHTED_SET, PREDICATE); } } @@ -403,6 +410,38 @@ public class SelectParser implements Parser { return orItem; } + private Item buildNearestNeighbor(String key, Inspector value) { + + HashMap children = childMap(value); + Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); + String field = children.get(0).asString(); + String property = children.get(0).asString(); + NearestNeighborItem item = new NearestNeighborItem(field, property); + Inspector annotations = getAnnotations(value); + if (annotations != null){ + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (TARGET_HITS.equals(annotation_name)){ + item.setTargetNumHits((int)(annotation_value.asDouble())); + } + if (TARGET_NUM_HITS.equals(annotation_name)){ + item.setTargetNumHits((int)(annotation_value.asDouble())); + } + if (HNSW_EXPLORE_ADDITIONAL_HITS.equals(annotation_name)) { + int hnswExploreAdditionalHits = (int)(annotation_value.asDouble()); + item.setHnswExploreAdditionalHits(hnswExploreAdditionalHits); + } + if (APPROXIMATE.equals(annotation_name)) { + boolean allowApproximate = annotation_value.asBool(); + item.setAllowApproximate(allowApproximate); + } + if (LABEL.equals(annotation_name)) { + item.setLabel(annotation_value.asString()); + } + }); + } + return item; + } + private CompositeItem buildWeakAnd(String key, Inspector value) { WeakAndItem weakAnd = new WeakAndItem(); addItemsFromInspector(weakAnd, value); @@ -410,6 +449,9 @@ public class SelectParser implements Parser { if (annotations != null){ annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (TARGET_HITS.equals(annotation_name)){ + weakAnd.setN((int)(annotation_value.asDouble())); + } if (TARGET_NUM_HITS.equals(annotation_name)){ weakAnd.setN((int)(annotation_value.asDouble())); } @@ -662,7 +704,10 @@ public class SelectParser implements Parser { HashMap children = childMap(value); Preconditions.checkArgument(children.size() == 2, "Expected 2 arguments, got %s.", children.size()); - Integer target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS); + Integer target_num_hits= getIntegerAnnotation(TARGET_HITS, annotations, null); + if (target_num_hits == null) { + target_num_hits= getIntegerAnnotation(TARGET_NUM_HITS, annotations, DEFAULT_TARGET_NUM_HITS); + } WandItem out = new WandItem(children.get(0).asString(), target_num_hits); diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java index 76b8c1ef8a2..aca2998cba3 100644 --- a/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateNearestNeighborSearcher.java @@ -97,7 +97,7 @@ public class ValidateNearestNeighborSearcher extends Searcher { /** Returns an error message if this is invalid, or null if it is valid */ private String validate(NearestNeighborItem item) { if (item.getTargetNumHits() < 1) - return item + " has invalid targetNumHits " + item.getTargetNumHits() + ": Must be >= 1"; + return item + " has invalid targetHits " + item.getTargetNumHits() + ": Must be >= 1"; String queryFeatureName = "query(" + item.getQueryTensorName() + ")"; Optional queryTensor = query.getRanking().getFeatures().getTensor(queryFeatureName); diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index f4560806dd2..7d17fe4f09d 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -173,6 +173,7 @@ public class YqlParser implements Parser { static final String STEM = "stem"; static final String SUBSTRING = "substring"; static final String SUFFIX = "suffix"; + static final String TARGET_HITS = "targetHits"; static final String TARGET_NUM_HITS = "targetNumHits"; static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; static final String UNIQUE_ID = "id"; @@ -418,8 +419,12 @@ public class YqlParser implements Parser { String field = fetchFieldRead(args.get(0)); String property = fetchFieldRead(args.get(1)); NearestNeighborItem item = new NearestNeighborItem(field, property); - Integer targetNumHits = getAnnotation(ast, TARGET_NUM_HITS, + Integer targetNumHits = getAnnotation(ast, TARGET_HITS, Integer.class, null, "desired minimum hits to produce"); + if (targetNumHits == null) { + targetNumHits = getAnnotation(ast, TARGET_NUM_HITS, + Integer.class, null, "desired minimum hits to produce"); + } if (targetNumHits != null) { item.setTargetNumHits(targetNumHits); } @@ -504,9 +509,13 @@ public class YqlParser implements Parser { List> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); - WandItem out = new WandItem(getIndex(args.get(0)), getAnnotation(ast, - TARGET_NUM_HITS, Integer.class, DEFAULT_TARGET_NUM_HITS, - "desired number of hits to accumulate in wand")); + Integer targetNumHits = getAnnotation(ast, TARGET_HITS, + Integer.class, null, "desired number of hits to accumulate in wand"); + if (targetNumHits == null) { + targetNumHits = getAnnotation(ast, TARGET_NUM_HITS, + Integer.class, DEFAULT_TARGET_NUM_HITS, "desired number of hits to accumulate in wand"); + } + WandItem out = new WandItem(getIndex(args.get(0)), targetNumHits); Double scoreThreshold = getAnnotation(ast, SCORE_THRESHOLD, Double.class, null, "min score for hit inclusion"); if (scoreThreshold != null) { @@ -1028,8 +1037,12 @@ public class YqlParser implements Parser { private CompositeItem buildWeakAnd(OperatorNode spec) { WeakAndItem weakAnd = new WeakAndItem(); - Integer targetNumHits = getAnnotation(spec, TARGET_NUM_HITS, + Integer targetNumHits = getAnnotation(spec, TARGET_HITS, Integer.class, null, "desired minimum hits to produce"); + if (targetNumHits == null) { + targetNumHits = getAnnotation(spec, TARGET_NUM_HITS, + Integer.class, null, "desired minimum hits to produce"); + } if (targetNumHits != null) { weakAnd.setN(targetNumHits); } diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java index 2c849a9b52c..c49603737a6 100644 --- a/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateNearestNeighborTestCase.java @@ -93,7 +93,7 @@ public class ValidateNearestNeighborTestCase { } private String makeQuery(String attributeTensor, String queryTensor) { - return "select * from sources * where [{\"targetNumHits\":1}]nearestNeighbor(" + attributeTensor + ", " + queryTensor + ");"; + return "select * from sources * where [{\"targetHits\":1}]nearestNeighbor(" + attributeTensor + ", " + queryTensor + ");"; } @Test @@ -139,7 +139,7 @@ public class ValidateNearestNeighborTestCase { r.append(",queryTensorName=").append(qt); r.append(",hnsw.exploreAdditionalHits=0"); r.append(",approximate=true"); - r.append(",targetNumHits=").append(th); + r.append(",targetHits=").append(th); r.append("} ").append(errmsg); return r.toString(); } @@ -149,7 +149,7 @@ public class ValidateNearestNeighborTestCase { String q = "select * from sources * where nearestNeighbor(dvector,qvector);"; Tensor t = makeTensor(tt_dense_dvector_3); Result r = doSearch(searcher, q, t); - assertErrMsg(desc("dvector", "qvector", 0, "has invalid targetNumHits 0: Must be >= 1"), r); + assertErrMsg(desc("dvector", "qvector", 0, "has invalid targetHits 0: Must be >= 1"), r); } @Test diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index e43dbd4e266..2ace21daace 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -513,7 +513,7 @@ public class YqlParserTestCase { public void testWand() { assertParse("select foo from bar where wand(description, {\"a\":1, \"b\":2});", "WAND(10,0.0,1.0) description{[1]:\"a\",[2]:\"b\"}"); - assertParse("select foo from bar where [ {\"scoreThreshold\": 13.3, \"targetNumHits\": 7, " + + assertParse("select foo from bar where [ {\"scoreThreshold\": 13.3, \"targetHits\": 7, " + "\"thresholdBoostFactor\": 2.3} ]wand(description, {\"a\":1, \"b\":2});", "WAND(7,13.3,2.3) description{[1]:\"a\",[2]:\"b\"}"); } @@ -550,11 +550,11 @@ public class YqlParserTestCase { @Test public void testNearestNeighbor() { assertParse("select foo from bar where nearestNeighbor(semantic_embedding, my_vector);", - "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetNumHits=0}"); - assertParse("select foo from bar where [{\"targetNumHits\": 37}]nearestNeighbor(semantic_embedding, my_vector);", - "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetNumHits=37}"); - assertParse("select foo from bar where [{\"approximate\": false, \"hnsw.exploreAdditionalHits\": 8, \"targetNumHits\": 3}]nearestNeighbor(semantic_embedding, my_vector);", - "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=8,approximate=false,targetNumHits=3}"); + "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=0}"); + assertParse("select foo from bar where [{\"targetHits\": 37}]nearestNeighbor(semantic_embedding, my_vector);", + "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=37}"); + assertParse("select foo from bar where [{\"approximate\": false, \"hnsw.exploreAdditionalHits\": 8, \"targetHits\": 3}]nearestNeighbor(semantic_embedding, my_vector);", + "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=8,approximate=false,targetHits=3}"); } @Test @@ -597,7 +597,7 @@ public class YqlParserTestCase { public void testWeakAnd() { assertParse("select foo from bar where weakAnd(a contains \"A\", b contains \"B\");", "WAND(100) a:A b:B"); - assertParse("select foo from bar where [{\"targetNumHits\": 37}]weakAnd(a contains \"A\", " + + assertParse("select foo from bar where [{\"targetHits\": 37}]weakAnd(a contains \"A\", " + "b contains \"B\");", "WAND(37) a:A b:B"); diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java index 1715ed38964..d2896aca870 100644 --- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java +++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -473,7 +473,7 @@ public class SelectTestCase { public void testWand() { assertParse("{ \"wand\": [\"description\", { \"a\": 1, \"b\": 2 }] }", "WAND(10,0.0,1.0) description{[1]:\"a\",[2]:\"b\"}"); - assertParse("{ \"wand\": { \"children\": [\"description\", { \"a\": 1, \"b\": 2 }], \"attributes\": { \"scoreThreshold\": 13.3, \"targetNumHits\": 7, \"thresholdBoostFactor\": 2.3 } } }", + assertParse("{ \"wand\": { \"children\": [\"description\", { \"a\": 1, \"b\": 2 }], \"attributes\": { \"scoreThreshold\": 13.3, \"targetHits\": 7, \"thresholdBoostFactor\": 2.3 } } }", "WAND(7,13.3,2.3) description{[1]:\"a\",[2]:\"b\"}"); } @@ -525,7 +525,7 @@ public class SelectTestCase { public void testWeakAnd() { assertParse("{ \"weakAnd\": [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ] }", "WAND(100) a:A b:B"); - assertParse("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"targetNumHits\": 37} }}", + assertParse("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"targetHits\": 37} }}", "WAND(37) a:A b:B"); QueryTree tree = parseWhere("{ \"weakAnd\": { \"children\" : [{ \"contains\": [\"a\", \"A\"] }, { \"contains\": [\"b\", \"B\"] } ], \"attributes\" : {\"scoreThreshold\": 41}}}"); -- cgit v1.2.3