diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-07-15 18:20:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-15 18:20:45 +0200 |
commit | 8dad7e25c8d1bd022880327a0c7f57e48efc302f (patch) | |
tree | a456fc74ed740d6c5c5fb6ed61f100c09c951c24 | |
parent | 0093a1340d19eb6aeb668eff9e9013767984ad8e (diff) | |
parent | 4b7e33430e02f1be1cda65cda6fa7e5736bf8fc0 (diff) |
Merge pull request #13899 from vespa-engine/arnej/add-geo-location-item
Arnej/add geo location item
82 files changed, 2274 insertions, 844 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index fcbe23aeb61..5faa5ec322d 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -527,6 +527,31 @@ ], "fields": [] }, + "com.yahoo.prelude.query.GeoLocationItem": { + "superClass": "com.yahoo.prelude.query.TermItem", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(com.yahoo.prelude.Location)", + "public void <init>(com.yahoo.prelude.Location, java.lang.String)", + "public com.yahoo.prelude.Location getLocation()", + "public java.lang.String getRawWord()", + "public com.yahoo.prelude.query.Item$ItemType getItemType()", + "public java.lang.String getName()", + "public java.lang.String stringValue()", + "public void setValue(java.lang.String)", + "public int hashCode()", + "public boolean equals(java.lang.Object)", + "public java.lang.String getIndexedString()", + "protected void encodeThis(java.nio.ByteBuffer)", + "public int getNumWords()", + "public boolean isStemmed()", + "public boolean isWords()" + ], + "fields": [] + }, "com.yahoo.prelude.query.HasIndexItem": { "superClass": "java.lang.Object", "interfaces": [], @@ -697,7 +722,7 @@ "public static final enum com.yahoo.prelude.query.Item$ItemType REGEXP", "public static final enum com.yahoo.prelude.query.Item$ItemType WORD_ALTERNATIVES", "public static final enum com.yahoo.prelude.query.Item$ItemType NEAREST_NEIGHBOR", - "public static final enum com.yahoo.prelude.query.Item$ItemType LOCATION_TERM", + "public static final enum com.yahoo.prelude.query.Item$ItemType GEO_LOCATION_TERM", "public final int code" ] }, diff --git a/container-search/src/main/java/com/yahoo/prelude/Location.java b/container-search/src/main/java/com/yahoo/prelude/Location.java index 908bf835e3c..3d3eed3b3df 100644 --- a/container-search/src/main/java/com/yahoo/prelude/Location.java +++ b/container-search/src/main/java/com/yahoo/prelude/Location.java @@ -9,7 +9,7 @@ import java.util.StringTokenizer; /** * Location data for a geographical query. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen * @author arnej27959 */ public class Location { @@ -127,7 +127,7 @@ public class Location { throw new IllegalArgumentException("n/s location must be in range [-90,+90]"); } if (radius_in_degrees < 0) { - pr = 512 * 1024 * 1024; + pr = -1; } x = px; y = py; @@ -142,7 +142,7 @@ public class Location { throw new IllegalArgumentException("can only set geo circle once"); } if (radius_in_units < 0) { - throw new IllegalArgumentException("radius must be positive"); + radius_in_units = -1; } x = px; y = py; @@ -248,6 +248,13 @@ public class Location { } public String toString() { + return render(false); + } + public String backendString() { + return render(true); + } + + private String render(boolean forBackend) { StringBuilder ser = new StringBuilder(); if (attribute != null) { ser.append(attribute).append(':'); @@ -271,7 +278,7 @@ public class Location { if (dimensions == 2) { ser.append(",").append(y); } - ser.append(",").append(r). + ser.append(",").append(forBackend ? backendRadius() : r). append(",").append(tableId). append(",").append(s). append(",").append(replace); @@ -358,11 +365,16 @@ public class Location { /** * Obtain circle radius (in degrees). + * Note that "no radius" or "infinite radius" is represented as -1. * May only be called when isGeoCircle() returns true. **/ public double degRadius() { checkGeoCircle(); - return 0.000001 * r; + return (r < 0) ? -1.0 : (0.000001 * r); + } + + private int backendRadius() { + return (r < 0) ? (512 * 1024 * 1024) : r; } /** @@ -370,7 +382,7 @@ public class Location { * For internal use. */ public int encode(ByteBuffer buffer) { - byte[] loc = Utf8.toBytes(toString()); + byte[] loc = Utf8.toBytes(backendString()); buffer.put(loc); return loc.length; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/GeoLocationItem.java b/container-search/src/main/java/com/yahoo/prelude/query/GeoLocationItem.java new file mode 100644 index 00000000000..8202c8fb279 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/query/GeoLocationItem.java @@ -0,0 +1,119 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.prelude.query; + +import com.google.common.annotations.Beta; +import com.yahoo.prelude.Location; +import java.nio.ByteBuffer; + +/** + * This represents a geo-location in the query tree. + * Used for closeness(fieldname) and distance(fieldname) rank features. + * @author arnej + */ +@Beta +public class GeoLocationItem extends TermItem { + + private Location location; + + /** + * Construct from a Location, which must be geo circle with an attribute set. + **/ + public GeoLocationItem(Location location) { + this(location, location.getAttribute()); + if (! location.hasAttribute()) { + throw new IllegalArgumentException("missing attribute on location: "+location); + } + } + + /** + * Construct from a Location and a field name. + * The Location must be a geo circle. + * If the Location has an attribute set, it must match the field name. + **/ + public GeoLocationItem(Location location, String fieldName) { + super(fieldName, false); + if (location.hasAttribute() && ! location.getAttribute().equals(fieldName)) { + throw new IllegalArgumentException("inconsistent attribute on location: "+location.getAttribute()+" versus fieldName: "+fieldName); + } + if (! location.isGeoCircle()) { + throw new IllegalArgumentException("GeoLocationItem only supports Geo Circles, got: "+location); + } + if (location.hasBoundingBox()) { + throw new IllegalArgumentException("GeoLocationItem does not support bounding box yet, got: "+location); + } + this.location = new Location(location.toString()); + this.location.setAttribute(null); // keep this in (superclass) indexName only + setNormalizable(false); + } + + public Location getLocation() { + return location; + } + + @Override + public String getRawWord() { + return stringValue(); + } + + @Override + public ItemType getItemType() { + return ItemType.GEO_LOCATION_TERM; + } + + @Override + public String getName() { + return "GEO_LOCATION"; + } + + @Override + public String stringValue() { + return location.toString(); + } + + @Override + public void setValue(String value) { + throw new UnsupportedOperationException("Cannot setValue("+value+") on "+getName()); + } + + @Override + public int hashCode() { + return java.util.Objects.hash(super.hashCode(), location); + } + + @Override + public boolean equals(Object object) { + if ( ! super.equals(object)) return false; + GeoLocationItem other = (GeoLocationItem) object; // Ensured by superclass + if ( ! location.equals(other.location)) return false; + return true; + } + + @Override + public String getIndexedString() { + return location.toString(); + } + + @Override + protected void encodeThis(ByteBuffer buffer) { + super.encodeThis(buffer); // takes care of index bytes + // TODO: use a better format for encoding the location on the wire. + putString(location.backendString(), buffer); + } + + @Override + public int getNumWords() { + return 1; + } + + @Override + public boolean isStemmed() { + return true; + } + + @Override + public boolean isWords() { + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java index bd368864e9a..c4978b2a378 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java @@ -61,7 +61,7 @@ public abstract class Item implements Cloneable { REGEXP(24), WORD_ALTERNATIVES(25), NEAREST_NEIGHBOR(26), - LOCATION_TERM(27); + GEO_LOCATION_TERM(27); public final int code; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcFillInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcFillInvoker.java index 9b661368972..0e8759f740e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcFillInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/RpcFillInvoker.java @@ -137,7 +137,7 @@ public class RpcFillInvoker extends FillInvoker { root.setString("ranking", rankProfile); } if (location != null) { - root.setString("location", location.toString()); + root.setString("location", location.backendString()); } Cursor gids = root.setArray("gids"); for (FastHit hit : hits) { diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java index 9910eb9532d..0d9acea7643 100644 --- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java +++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java @@ -3,9 +3,12 @@ package com.yahoo.search.query; import com.google.common.base.Preconditions; import com.yahoo.collections.LazyMap; +import com.yahoo.geo.DistanceParser; +import com.yahoo.geo.ParsedDegree; import com.yahoo.language.Language; import com.yahoo.language.process.Normalizer; import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Location; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.BoolItem; import com.yahoo.prelude.query.CompositeItem; @@ -15,6 +18,7 @@ import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.query.GeoLocationItem; import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NearestNeighborItem; import com.yahoo.prelude.query.NotItem; @@ -47,6 +51,7 @@ import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.SlimeUtils; +import com.yahoo.slime.Type; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; @@ -61,6 +66,65 @@ import static com.yahoo.slime.Type.LONG; import static com.yahoo.slime.Type.OBJECT; import static com.yahoo.slime.Type.STRING; +import static com.yahoo.search.yql.YqlParser.ACCENT_DROP; +import static com.yahoo.search.yql.YqlParser.ALTERNATIVES; +import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING; +import static com.yahoo.search.yql.YqlParser.ANNOTATIONS; +import static com.yahoo.search.yql.YqlParser.APPROXIMATE; +import static com.yahoo.search.yql.YqlParser.ASCENDING_HITS_ORDER; +import static com.yahoo.search.yql.YqlParser.BOUNDS; +import static com.yahoo.search.yql.YqlParser.BOUNDS_LEFT_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_OPEN; +import static com.yahoo.search.yql.YqlParser.BOUNDS_RIGHT_OPEN; +import static com.yahoo.search.yql.YqlParser.CONNECTION_ID; +import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT; +import static com.yahoo.search.yql.YqlParser.CONNECTIVITY; +import static com.yahoo.search.yql.YqlParser.DEFAULT_TARGET_NUM_HITS; +import static com.yahoo.search.yql.YqlParser.DESCENDING_HITS_ORDER; +import static com.yahoo.search.yql.YqlParser.DISTANCE; +import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; +import static com.yahoo.search.yql.YqlParser.END_ANCHOR; +import static com.yahoo.search.yql.YqlParser.EQUIV; +import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.GEO_LOCATION; +import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; +import static com.yahoo.search.yql.YqlParser.HNSW_EXPLORE_ADDITIONAL_HITS; +import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; +import static com.yahoo.search.yql.YqlParser.LABEL; +import static com.yahoo.search.yql.YqlParser.NEAR; +import static com.yahoo.search.yql.YqlParser.NEAREST_NEIGHBOR; +import static com.yahoo.search.yql.YqlParser.NFKC; +import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE; +import static com.yahoo.search.yql.YqlParser.ONEAR; +import static com.yahoo.search.yql.YqlParser.ORIGIN; +import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH; +import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET; +import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL; +import static com.yahoo.search.yql.YqlParser.PHRASE; +import static com.yahoo.search.yql.YqlParser.PREDICATE; +import static com.yahoo.search.yql.YqlParser.PREFIX; +import static com.yahoo.search.yql.YqlParser.RANGE; +import static com.yahoo.search.yql.YqlParser.RANK; +import static com.yahoo.search.yql.YqlParser.RANKED; +import static com.yahoo.search.yql.YqlParser.SAME_ELEMENT; +import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD; +import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE; +import static com.yahoo.search.yql.YqlParser.START_ANCHOR; +import static com.yahoo.search.yql.YqlParser.STEM; +import static com.yahoo.search.yql.YqlParser.SUBSTRING; +import static com.yahoo.search.yql.YqlParser.SUFFIX; +import static com.yahoo.search.yql.YqlParser.TARGET_HITS; +import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS; +import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR; +import static com.yahoo.search.yql.YqlParser.UNIQUE_ID; +import static com.yahoo.search.yql.YqlParser.URI; +import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA; +import static com.yahoo.search.yql.YqlParser.USER_INPUT_LANGUAGE; +import static com.yahoo.search.yql.YqlParser.WAND; +import static com.yahoo.search.yql.YqlParser.WEAK_AND; +import static com.yahoo.search.yql.YqlParser.WEIGHT; +import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET; + /** * The Select query language. * @@ -70,6 +134,14 @@ import static com.yahoo.slime.Type.STRING; */ public class SelectParser implements Parser { + private static final String AND = "and"; + private static final String AND_NOT = "and_not"; + private static final String CALL = "call"; + private static final String CONTAINS = "contains"; + private static final String EQ = "equals"; + private static final String MATCHES = "matches"; + private static final String OR = "or"; + Parsable query; private final IndexFacts indexFacts; private final Map<Integer, TaggableItem> identifiedItems = LazyMap.newHashMap(); @@ -77,65 +149,7 @@ public class SelectParser implements Parser { private final Normalizer normalizer; private IndexFacts.Session indexFactsSession; - // YQL parameters and functions - private static final String DESCENDING_HITS_ORDER = "descending"; - private static final String ASCENDING_HITS_ORDER = "ascending"; - private static final Integer DEFAULT_TARGET_NUM_HITS = 10; - private static final String ORIGIN_LENGTH = "length"; - private static final String ORIGIN_OFFSET = "offset"; - private static final String ORIGIN = "origin"; - private static final String ORIGIN_ORIGINAL = "original"; - private static final String CONNECTION_ID = "id"; - private static final String CONNECTION_WEIGHT = "weight"; - private static final String CONNECTIVITY = "connectivity"; - private static final String ANNOTATIONS = "annotations"; - private static final String NFKC = "nfkc"; - private static final String USER_INPUT_LANGUAGE = "language"; - private static final String ACCENT_DROP = "accentDrop"; - private static final String ALTERNATIVES = "alternatives"; - private static final String AND_SEGMENTING = "andSegmenting"; - private static final String APPROXIMATE = "approximate"; - private static final String DISTANCE = "distance"; - private static final String DOT_PRODUCT = "dotProduct"; - private static final String EQUIV = "equiv"; - private static final String FILTER = "filter"; - private static final String HIT_LIMIT = "hitLimit"; - private static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits"; - private static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; - private static final String LABEL = "label"; - private static final String NEAR = "near"; - private static final String NEAREST_NEIGHBOR = "nearestNeighbor"; - private static final String NORMALIZE_CASE = "normalizeCase"; - private static final String ONEAR = "onear"; - private static final String PHRASE = "phrase"; - private static final String PREDICATE = "predicate"; - private static final String PREFIX = "prefix"; - private static final String RANKED = "ranked"; - private static final String RANK = "rank"; - private static final String SAME_ELEMENT = "sameElement"; - private static final String SCORE_THRESHOLD = "scoreThreshold"; - private static final String SIGNIFICANCE = "significance"; - private static final String STEM = "stem"; - private static final String SUBSTRING = "substring"; - private static final String SUFFIX = "suffix"; - private static final String TARGET_HITS = "targetHits"; - private static final String TARGET_NUM_HITS = "targetNumHits"; - private static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; - private static final String UNIQUE_ID = "id"; - private static final String USE_POSITION_DATA = "usePositionData"; - private static final String WAND = "wand"; - private static final String WEAK_AND = "weakAnd"; - private static final String WEIGHTED_SET = "weightedSet"; - private static final String WEIGHT = "weight"; - private static final String AND = "and"; - private static final String AND_NOT = "and_not"; - private static final String OR = "or"; - private static final String EQ = "equals"; - private static final String RANGE = "range"; - private static final String CONTAINS = "contains"; - private static final String MATCHES = "matches"; - private static final String CALL = "call"; - private static final List<String> FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, NEAREST_NEIGHBOR, PREDICATE, RANK, WEAK_AND); + private static final List<String> FUNCTION_CALLS = Arrays.asList(WAND, WEIGHTED_SET, DOT_PRODUCT, GEO_LOCATION, NEAREST_NEIGHBOR, PREDICATE, RANK, WEAK_AND); public SelectParser(ParserEnvironment environment) { indexFacts = environment.getIndexFacts(); @@ -153,7 +167,7 @@ public class SelectParser implements Parser { } private QueryTree buildTree() { - Inspector inspector = SlimeUtils.jsonToSlime(this.query.getSelect().getWhereString().getBytes()).get(); + Inspector inspector = SlimeUtils.jsonToSlime(this.query.getSelect().getWhereString()).get(); if (inspector.field("error_message").valid()) { throw new QueryException("Illegal query: " + inspector.field("error_message").asString() + " at: '" + new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8) + "'"); @@ -213,7 +227,7 @@ public class SelectParser implements Parser { /** Translates a list of grouping requests on JSON form to a list in the grouping language form */ private List<String> toGroupingRequests(String groupingJson) { - Inspector inspector = SlimeUtils.jsonToSlime(groupingJson.getBytes()).get(); + Inspector inspector = SlimeUtils.jsonToSlime(groupingJson).get(); if (inspector.field("error_message").valid()) { throw new QueryException("Illegal query: " + inspector.field("error_message").asString() + " at: '" + new String(inspector.field("offending_input").asData(), StandardCharsets.UTF_8) + "'"); @@ -264,6 +278,8 @@ public class SelectParser implements Parser { return buildWeightedSet(key, value); case DOT_PRODUCT: return buildDotProduct(key, value); + case GEO_LOCATION: + return buildGeoLocation(key, value); case NEAREST_NEIGHBOR: return buildNearestNeighbor(key, value); case PREDICATE: @@ -410,6 +426,47 @@ public class SelectParser implements Parser { return orItem; } + private Item buildGeoLocation(String key, Inspector value) { + HashMap<Integer, Inspector> children = childMap(value); + Preconditions.checkArgument(children.size() == 4, "Expected 4 arguments, got %s.", children.size()); + String field = children.get(0).asString(); + var arg1 = children.get(1); + var arg2 = children.get(2); + var arg3 = children.get(3); + var loc = new Location(); + if (arg3.type() != Type.STRING) { + throw new IllegalArgumentException("Invalid geoLocation radius type "+arg3.type()+" for "+arg3); + } + double radius = DistanceParser.parse(arg3.asString()); + if (arg1.type() == Type.STRING && arg2.type() == Type.STRING) { + var c1input = children.get(1).asString(); + var c2input = children.get(2).asString(); + var coord_1 = ParsedDegree.fromString(c1input, true, false); + var coord_2 = ParsedDegree.fromString(c2input, false, true); + if (coord_1.isLatitude && coord_2.isLongitude) { + loc.setGeoCircle(coord_1.degrees, coord_2.degrees, radius); + } else if (coord_2.isLatitude && coord_1.isLongitude) { + loc.setGeoCircle(coord_2.degrees, coord_1.degrees, radius); + } else { + throw new IllegalArgumentException("Invalid geoLocation coordinates '"+c1input+"' and '"+c2input+"'"); + } + } else if (arg1.type() == Type.DOUBLE && arg2.type() == Type.DOUBLE) { + loc.setGeoCircle(arg1.asDouble(), arg2.asDouble(), radius); + } else { + throw new IllegalArgumentException("Invalid geoLocation coordinate types "+arg1.type()+" and "+arg2.type()); + } + var item = new GeoLocationItem(loc, field); + Inspector annotations = getAnnotations(value); + if (annotations != null){ + annotations.traverse((ObjectTraverser) (annotation_name, annotation_value) -> { + if (LABEL.equals(annotation_name)) { + item.setLabel(annotation_value.asString()); + } + }); + } + return item; + } + private Item buildNearestNeighbor(String key, Inspector value) { HashMap<Integer, Inspector> children = childMap(value); diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java index dd52b9e19b8..22328fb026e 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java +++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java @@ -16,6 +16,7 @@ import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT; import static com.yahoo.search.yql.YqlParser.END_ANCHOR; import static com.yahoo.search.yql.YqlParser.EQUIV; import static com.yahoo.search.yql.YqlParser.FILTER; +import static com.yahoo.search.yql.YqlParser.GEO_LOCATION; import static com.yahoo.search.yql.YqlParser.HIT_LIMIT; import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS; import static com.yahoo.search.yql.YqlParser.LABEL; @@ -72,6 +73,7 @@ import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IndexedItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.GeoLocationItem; import com.yahoo.prelude.query.MarkerWordItem; import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NearestNeighborItem; @@ -689,6 +691,26 @@ public class VespaSerializer { } + private static class GeoLocationSerializer extends Serializer<GeoLocationItem> { + @Override + void onExit(StringBuilder destination, GeoLocationItem item) { } + @Override + boolean serialize(StringBuilder destination, GeoLocationItem item) { + String annotations = leafAnnotations(item); + if (annotations.length() > 0) { + destination.append("([{").append(annotations).append("}]"); + } + destination.append(GEO_LOCATION).append('('); + destination.append(item.getIndexName()).append(", "); + var loc = item.getLocation(); + destination.append(loc.degNS()).append(", "); + destination.append(loc.degEW()).append(", "); + destination.append('"').append(loc.degRadius()).append(" deg").append('"'); + destination.append(')'); + return false; + } + } + private static class NearestNeighborSerializer extends Serializer<NearestNeighborItem> { @Override @@ -1163,6 +1185,7 @@ public class VespaSerializer { dispatchBuilder.put(EquivItem.class, new EquivSerializer()); dispatchBuilder.put(ExactStringItem.class, new WordSerializer()); dispatchBuilder.put(IntItem.class, new NumberSerializer()); + dispatchBuilder.put(GeoLocationItem.class, new GeoLocationSerializer()); dispatchBuilder.put(BoolItem.class, new BoolSerializer()); dispatchBuilder.put(MarkerWordItem.class, new WordSerializer()); // gotcha dispatchBuilder.put(NearItem.class, new NearSerializer()); diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java index 7d17fe4f09d..6a464a1503b 100644 --- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java +++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java @@ -19,11 +19,14 @@ import com.google.common.annotations.Beta; import com.google.common.base.Preconditions; import com.yahoo.collections.LazyMap; import com.yahoo.collections.LazySet; +import com.yahoo.geo.DistanceParser; +import com.yahoo.geo.ParsedDegree; import com.yahoo.language.Language; import com.yahoo.language.detect.Detector; import com.yahoo.language.process.Normalizer; import com.yahoo.language.process.Segmenter; import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.Location; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.AndSegmentItem; import com.yahoo.prelude.query.BoolItem; @@ -34,6 +37,7 @@ import com.yahoo.prelude.query.ExactStringItem; import com.yahoo.prelude.query.IntItem; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.Limit; +import com.yahoo.prelude.query.GeoLocationItem; import com.yahoo.prelude.query.NearItem; import com.yahoo.prelude.query.NearestNeighborItem; import com.yahoo.prelude.query.NotItem; @@ -94,8 +98,8 @@ import com.yahoo.search.query.parser.ParserFactory; */ public class YqlParser implements Parser { - private static final String DESCENDING_HITS_ORDER = "descending"; - private static final String ASCENDING_HITS_ORDER = "ascending"; + public static final String DESCENDING_HITS_ORDER = "descending"; + public static final String ASCENDING_HITS_ORDER = "ascending"; private enum SegmentWhen { NEVER, POSSIBLY, ALWAYS; @@ -107,12 +111,12 @@ public class YqlParser implements Parser { private static final Integer DEFAULT_HITS = 10; private static final Integer DEFAULT_OFFSET = 0; - private static final Integer DEFAULT_TARGET_NUM_HITS = 10; + public static final Integer DEFAULT_TARGET_NUM_HITS = 10; private static final String ACCENT_DROP_DESCRIPTION = "setting for whether to remove accents if field implies it"; - private static final String ANNOTATIONS = "annotations"; + public static final String ANNOTATIONS = "annotations"; private static final String FILTER_DESCRIPTION = "term filter setting"; private static final String IMPLICIT_TRANSFORMS_DESCRIPTION = "setting for whether built-in query transformers should touch the term"; - private static final String NFKC = "nfkc"; + public static final String NFKC = "nfkc"; private static final String NORMALIZE_CASE_DESCRIPTION = "setting for whether to do case normalization if field implies it"; private static final String ORIGIN_DESCRIPTION = "string origin for a term"; private static final String RANKED_DESCRIPTION = "setting for whether to use term for ranking"; @@ -121,7 +125,7 @@ public class YqlParser implements Parser { private static final String USER_INPUT_ALLOW_EMPTY = "allowEmpty"; private static final String USER_INPUT_DEFAULT_INDEX = "defaultIndex"; private static final String USER_INPUT_GRAMMAR = "grammar"; - private static final String USER_INPUT_LANGUAGE = "language"; + public static final String USER_INPUT_LANGUAGE = "language"; private static final String USER_INPUT_RAW = "raw"; private static final String USER_INPUT_SEGMENT = "segment"; private static final String USER_INPUT = "userInput"; @@ -134,55 +138,56 @@ public class YqlParser implements Parser { public static final String SORTING_LOCALE = "locale"; public static final String SORTING_STRENGTH = "strength"; - static final String ACCENT_DROP = "accentDrop"; - static final String ALTERNATIVES = "alternatives"; - static final String AND_SEGMENTING = "andSegmenting"; - static final String APPROXIMATE = "approximate"; - static final String BOUNDS = "bounds"; - static final String BOUNDS_LEFT_OPEN = "leftOpen"; - static final String BOUNDS_OPEN = "open"; - static final String BOUNDS_RIGHT_OPEN = "rightOpen"; - static final String CONNECTION_ID = "id"; - static final String CONNECTION_WEIGHT = "weight"; - static final String CONNECTIVITY = "connectivity"; - static final String DISTANCE = "distance"; - static final String DOT_PRODUCT = "dotProduct"; - static final String EQUIV = "equiv"; - static final String FILTER = "filter"; - static final String HIT_LIMIT = "hitLimit"; - static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits"; - static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; - static final String LABEL = "label"; - static final String NEAR = "near"; - static final String NEAREST_NEIGHBOR = "nearestNeighbor"; - static final String NORMALIZE_CASE = "normalizeCase"; - static final String ONEAR = "onear"; - static final String ORIGIN_LENGTH = "length"; - static final String ORIGIN_OFFSET = "offset"; - static final String ORIGIN = "origin"; - static final String ORIGIN_ORIGINAL = "original"; - static final String PHRASE = "phrase"; - static final String PREDICATE = "predicate"; - static final String PREFIX = "prefix"; - static final String RANGE = "range"; - static final String RANKED = "ranked"; - static final String RANK = "rank"; - static final String SAME_ELEMENT = "sameElement"; - static final String SCORE_THRESHOLD = "scoreThreshold"; - static final String SIGNIFICANCE = "significance"; - static final String STEM = "stem"; - static final String SUBSTRING = "substring"; - static final String SUFFIX = "suffix"; - static final String TARGET_HITS = "targetHits"; - static final String TARGET_NUM_HITS = "targetNumHits"; - static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; - static final String UNIQUE_ID = "id"; - static final String USE_POSITION_DATA = "usePositionData"; - static final String WAND = "wand"; - static final String WEAK_AND = "weakAnd"; - static final String WEIGHTED_SET = "weightedSet"; - static final String WEIGHT = "weight"; - static final String URI = "uri"; + public static final String ACCENT_DROP = "accentDrop"; + public static final String ALTERNATIVES = "alternatives"; + public static final String AND_SEGMENTING = "andSegmenting"; + public static final String APPROXIMATE = "approximate"; + public static final String BOUNDS = "bounds"; + public static final String BOUNDS_LEFT_OPEN = "leftOpen"; + public static final String BOUNDS_OPEN = "open"; + public static final String BOUNDS_RIGHT_OPEN = "rightOpen"; + public static final String CONNECTION_ID = "id"; + public static final String CONNECTION_WEIGHT = "weight"; + public static final String CONNECTIVITY = "connectivity"; + public static final String DISTANCE = "distance"; + public static final String DOT_PRODUCT = "dotProduct"; + public static final String EQUIV = "equiv"; + public static final String FILTER = "filter"; + public static final String GEO_LOCATION = "geoLocation"; + public static final String HIT_LIMIT = "hitLimit"; + public static final String HNSW_EXPLORE_ADDITIONAL_HITS = "hnsw.exploreAdditionalHits"; + public static final String IMPLICIT_TRANSFORMS = "implicitTransforms"; + public static final String LABEL = "label"; + public static final String NEAR = "near"; + public static final String NEAREST_NEIGHBOR = "nearestNeighbor"; + public static final String NORMALIZE_CASE = "normalizeCase"; + public static final String ONEAR = "onear"; + public static final String ORIGIN_LENGTH = "length"; + public static final String ORIGIN_OFFSET = "offset"; + public static final String ORIGIN = "origin"; + public static final String ORIGIN_ORIGINAL = "original"; + public static final String PHRASE = "phrase"; + public static final String PREDICATE = "predicate"; + public static final String PREFIX = "prefix"; + public static final String RANGE = "range"; + public static final String RANKED = "ranked"; + public static final String RANK = "rank"; + public static final String SAME_ELEMENT = "sameElement"; + public static final String SCORE_THRESHOLD = "scoreThreshold"; + public static final String SIGNIFICANCE = "significance"; + public static final String STEM = "stem"; + public static final String SUBSTRING = "substring"; + public static final String SUFFIX = "suffix"; + public static final String TARGET_HITS = "targetHits"; + public static final String TARGET_NUM_HITS = "targetNumHits"; + public static final String THRESHOLD_BOOST_FACTOR = "thresholdBoostFactor"; + public static final String UNIQUE_ID = "id"; + public static final String USE_POSITION_DATA = "usePositionData"; + public static final String WAND = "wand"; + public static final String WEAK_AND = "weakAnd"; + public static final String WEIGHTED_SET = "weightedSet"; + public static final String WEIGHT = "weight"; + public static final String URI = "uri"; private final IndexFacts indexFacts; private final List<ConnectedItem> connectedItems = new ArrayList<>(); @@ -372,6 +377,8 @@ public class YqlParser implements Parser { return buildWeightedSet(ast); case DOT_PRODUCT: return buildDotProduct(ast); + case GEO_LOCATION: + return buildGeoLocation(ast); case NEAREST_NEIGHBOR: return buildNearestNeighbor(ast); case PREDICATE: @@ -413,6 +420,29 @@ public class YqlParser implements Parser { return fillWeightedSet(ast, args.get(1), new DotProductItem(getIndex(args.get(0)))); } + private Item buildGeoLocation(OperatorNode<ExpressionOperator> ast) { + List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); + Preconditions.checkArgument(args.size() == 4, "Expected 4 arguments, got %s.", args.size()); + String field = fetchFieldRead(args.get(0)); + var coord_1 = ParsedDegree.fromString(fetchFieldRead(args.get(1)), true, false); + var coord_2 = ParsedDegree.fromString(fetchFieldRead(args.get(2)), false, true); + double radius = DistanceParser.parse(fetchFieldRead(args.get(3))); + var loc = new Location(); + if (coord_1.isLatitude && coord_2.isLongitude) { + loc.setGeoCircle(coord_1.degrees, coord_2.degrees, radius); + } else if (coord_2.isLatitude && coord_1.isLongitude) { + loc.setGeoCircle(coord_2.degrees, coord_1.degrees, radius); + } else { + throw new IllegalArgumentException("Invalid geoLocation coordinates '"+coord_1+"' and '"+coord_2+"'"); + } + var item = new GeoLocationItem(loc, field); + String label = getAnnotation(ast, LABEL, String.class, null, "item label"); + if (label != null) { + item.setLabel(label); + } + return item; + } + private Item buildNearestNeighbor(OperatorNode<ExpressionOperator> ast) { List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1); Preconditions.checkArgument(args.size() == 2, "Expected 2 arguments, got %s.", args.size()); @@ -438,7 +468,7 @@ public class YqlParser implements Parser { item.setAllowApproximate(allowApproximate); String label = getAnnotation(ast, LABEL, String.class, null, "item label"); if (label != null) { - item.setLabel(label); + item.setLabel(label); } return item; } @@ -902,6 +932,8 @@ public class YqlParser implements Parser { private static String fetchFieldRead(OperatorNode<ExpressionOperator> ast) { switch (ast.getOperator()) { + case LITERAL: + return ast.getArgument(0).toString(); case READ_FIELD: return ast.getArgument(1); case PROPREF: diff --git a/container-search/src/test/java/com/yahoo/prelude/searcher/test/PosSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/searcher/test/PosSearcherTestCase.java index aa3fa53119e..aa48e8494f2 100644 --- a/container-search/src/test/java/com/yahoo/prelude/searcher/test/PosSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/searcher/test/PosSearcherTestCase.java @@ -123,7 +123,8 @@ public class PosSearcherTestCase { q.properties().set("pos.ll", "N0;E0"); q.properties().set("pos.radius", "-1"); doSearch(searcher, q, 0, 10); - assertEquals("(2,0,0,536870912,0,1,0,4294967295)", q.getRanking().getLocation().toString()); + assertEquals("(2,0,0,-1,0,1,0,4294967295)", q.getRanking().getLocation().toString()); + assertEquals("(2,0,0,536870912,0,1,0,4294967295)", q.getRanking().getLocation().backendString()); } /** diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java index d770b08d31a..f8e930fa19d 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java @@ -119,6 +119,14 @@ public class VespaSerializerTestCase { } @Test + public void testGeoLocation() { + parseAndConfirm("geoLocation(workplace, 63.418417, 10.433033, \"0.5 deg\")"); + parseAndConfirm("geoLocation(headquarters, 37.41638, -122.024683, \"180.0 deg\")"); + parseAndConfirm("geoLocation(home, -17.0, 42.0, \"0.0 deg\")"); + parseAndConfirm("geoLocation(workplace, -12.0, -34.0, \"-1.0 deg\")"); + } + + @Test public void testNear() { parseAndConfirm("title contains near(\"a\", \"b\")"); parseAndConfirm("title contains ([{\"distance\": 50}]near(\"a\", \"b\"))"); diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index a151244525a..62a9e27cd96 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -548,6 +548,24 @@ public class YqlParserTestCase { } @Test + public void testGeoLocation() { + assertParse("select foo from bar where geoLocation(workplace, 63.418417, 10.433033, \"0.5 deg\");", + "GEO_LOCATION workplace:(2,10433033,63418417,500000,0,1,0,1921876103)"); + assertParse("select foo from bar where geoLocation(headquarters, \"37.416383\", \"-122.024683\", \"100 miles\");", + "GEO_LOCATION headquarters:(2,-122024683,37416383,1450561,0,1,0,3411238761)"); + assertParse("select foo from bar where geoLocation(home, \"E10.433033\", \"N63.418417\", \"5km\");", + "GEO_LOCATION home:(2,10433033,63418417,45066,0,1,0,1921876103)"); + + assertParseFail("select foo from bar where geoLocation(qux, 1, 2);", + new IllegalArgumentException("Expected 4 arguments, got 3.")); + assertParseFail("select foo from bar where geoLocation(qux, 2.0, \"N5.0\", \"0.5 deg\");", + new IllegalArgumentException( + "Invalid geoLocation coordinates 'Latitude: 2.0 degrees' and 'Latitude: 5.0 degrees'")); + assertParse("select foo from bar where geoLocation(workplace, -12, -34, \"-77 d\");", + "GEO_LOCATION workplace:(2,-34000000,-12000000,-1,0,1,0,4201111954)"); + } + + @Test public void testNearestNeighbor() { assertParse("select foo from bar where nearestNeighbor(semantic_embedding, my_vector);", "NEAREST_NEIGHBOR {field=semantic_embedding,queryTensorName=my_vector,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=0}"); diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java index 4691ef42e55..f297fd69f24 100644 --- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java +++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java @@ -522,6 +522,18 @@ public class SelectTestCase { } @Test + public void testGeoLocation() { + assertParse("{ \"geoLocation\": [ \"workplace\", 63.418417, 10.433033, \"0.5 deg\" ] }", + "GEO_LOCATION workplace:(2,10433033,63418417,500000,0,1,0,1921876103)"); + assertParse("{ \"geoLocation\": [ \"headquarters\", \"37.416383\", \"-122.024683\", \"100 miles\" ] }", + "GEO_LOCATION headquarters:(2,-122024683,37416383,1450561,0,1,0,3411238761)"); + assertParse("{ \"geoLocation\": [ \"home\", \"E10.433033\", \"N63.418417\", \"5km\" ] }", + "GEO_LOCATION home:(2,10433033,63418417,45066,0,1,0,1921876103)"); + assertParse("{ \"geoLocation\": [ \"workplace\", -12.0, -34.0, \"-77 deg\" ] }", + "GEO_LOCATION workplace:(2,-34000000,-12000000,-1,0,1,0,4201111954)"); + } + + @Test public void testNearestNeighbor() { assertParse("{ \"nearestNeighbor\": [ \"f1field\", \"q2prop\" ] }", "NEAREST_NEIGHBOR {field=f1field,queryTensorName=q2prop,hnsw.exploreAdditionalHits=0,approximate=true,targetHits=0}"); diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp index 24e1e886351..6fbd43eabbe 100644 --- a/searchcore/src/tests/proton/matching/query_test.cpp +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -730,7 +730,8 @@ void checkQueryAddsLocation(Test &test, const string &loc_string) { SearchIterator::UP search = query.createSearch(*md); test.ASSERT_TRUE(search.get()); if (!test.EXPECT_NOT_EQUAL(string::npos, search->asString().find(loc_string))) { - fprintf(stderr, "search (missing loc_string): %s", search->asString().c_str()); + fprintf(stderr, "search (missing loc_string '%s'): %s", + loc_string.c_str(), search->asString().c_str()); } } diff --git a/searchcore/src/tests/proton/matching/termdataextractor_test.cpp b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp index 2570e64dbe2..36c34e38a04 100644 --- a/searchcore/src/tests/proton/matching/termdataextractor_test.cpp +++ b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp @@ -83,7 +83,7 @@ Node::UP getQuery(const ViewResolver &resolver) query_builder.addStringTerm("bar", field, id[3], Weight(0)); } - query_builder.addLocationTerm(Location(Point(10, 10), 3, 0), + query_builder.addLocationTerm(Location(Point{10, 10}, 3, 0), field, id[7], Weight(0)); Node::UP node = query_builder.build(); diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.cpp index 0869fc175a7..0dbffe2402a 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.cpp @@ -181,38 +181,6 @@ DocsumContext::FillRankFeatures(search::docsummary::GetDocsumsState * state, sea state->_rankFeatures = _matcher->getRankFeatures(_request, _searchCtx, _attrCtx, _sessionMgr); } -namespace { -Location *getLocation(const string &loc_str, search::IAttributeManager &attrMgr) -{ - LOG(debug, "Filling document locations from location string: %s", loc_str.c_str()); - - Location *loc = new Location; - string location; - string::size_type pos = loc_str.find(':'); - if (pos != string::npos) { - string view = loc_str.substr(0, pos); - AttributeGuard::UP vec = attrMgr.getAttribute(view); - if (!vec->valid()) { - view = PositionDataType::getZCurveFieldName(view); - vec = attrMgr.getAttribute(view); - } - loc->setVecGuard(std::move(vec)); - location = loc_str.substr(pos + 1); - } else { - LOG(warning, "Location string lacks attribute vector specification. loc='%s'", loc_str.c_str()); - location = loc_str; - } - loc->parse(location); - return loc; -} -} // namespace - -void -DocsumContext::ParseLocation(search::docsummary::GetDocsumsState *state) -{ - state->_parsedLocation.reset(getLocation(_request.location, _attrMgr)); -} - std::unique_ptr<MatchingElements> DocsumContext::fill_matching_elements(const MatchingElementsFields &fields) { diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.h b/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.h index 1624048828f..d1b656915d9 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.h +++ b/searchcore/src/vespa/searchcore/proton/docsummary/docsumcontext.h @@ -52,7 +52,6 @@ public: // Implements GetDocsumsStateCallback void FillSummaryFeatures(search::docsummary::GetDocsumsState * state, search::docsummary::IDocsumEnvironment * env) override; void FillRankFeatures(search::docsummary::GetDocsumsState * state, search::docsummary::IDocsumEnvironment * env) override; - void ParseLocation(search::docsummary::GetDocsumsState * state) override; std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields &fields) override; }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt index 558914805d1..ffbab597118 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt @@ -27,6 +27,7 @@ vespa_add_library(searchcore_matching STATIC querynodes.cpp ranking_constants.cpp requestcontext.cpp + resolveviewvisitor.cpp result_processor.cpp same_element_builder.cpp sameelementmodifier.cpp diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index 62a59ab7680..8edf85657dc 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -8,10 +8,9 @@ #include "sameelementmodifier.h" #include "unpacking_iterators_optimizer.h" #include <vespa/document/datatype/positiondatatype.h> -#include <vespa/searchlib/common/location.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/searchlib/common/geo_location_parser.h> #include <vespa/searchlib/parsequery/stackdumpiterator.h> -#include <vespa/searchlib/query/tree/point.h> -#include <vespa/searchlib/query/tree/rectangle.h> #include <vespa/searchlib/queryeval/intermediate_blueprints.h> #include <vespa/log/log.h> @@ -20,11 +19,13 @@ LOG_SETUP(".proton.matching.query"); using document::PositionDataType; using search::SimpleQueryStackDumpIterator; +using search::common::GeoLocation; +using search::common::GeoLocationParser; +using search::common::GeoLocationSpec; using search::fef::IIndexEnvironment; using search::fef::ITermData; using search::fef::MatchData; using search::fef::MatchDataLayout; -using search::fef::Location; using search::query::Node; using search::query::QueryTreeCreator; using search::query::Weight; @@ -58,37 +59,79 @@ inject(Node::UP query, Node::UP to_inject) { return query; } -void -addLocationNode(const string &location_str, Node::UP &query_tree, Location &fef_location) { - if (location_str.empty()) { - return; - } - string::size_type pos = location_str.find(':'); - if (pos == string::npos) { - LOG(warning, "Location string lacks attribute vector specification. loc='%s'", location_str.c_str()); - return; +std::vector<ProtonLocationTerm *> +find_location_terms(Node *tree) { + std::vector<ProtonLocationTerm *> retval; + std::vector<Node *> nodes; + nodes.push_back(tree); + for (size_t i = 0; i < nodes.size(); ++i) { + if (auto loc = dynamic_cast<ProtonLocationTerm *>(nodes[i])) { + retval.push_back(loc); + } + if (auto parent = dynamic_cast<const search::query::Intermediate *>(nodes[i])) { + for (Node * child : parent->getChildren()) { + nodes.push_back(child); + } + } } - const string view = PositionDataType::getZCurveFieldName(location_str.substr(0, pos)); - const string loc = location_str.substr(pos + 1); + return retval; +} - search::common::Location locationSpec; - if (!locationSpec.parse(loc)) { - LOG(warning, "Location parse error (location: '%s'): %s", location_str.c_str(), locationSpec.getParseError()); - return; +GeoLocationSpec parse_location_string(string str) { + GeoLocationSpec empty; + if (str.empty()) { + return empty; } + GeoLocationParser parser; + if (parser.parseOldFormatWithField(str)) { + auto attr_name = PositionDataType::getZCurveFieldName(parser.getFieldName()); + return GeoLocationSpec{attr_name, parser.getGeoLocation()}; + } else { + LOG(warning, "Location parse error (location: '%s'): %s", str.c_str(), parser.getParseError()); + } + return empty; +} + +GeoLocationSpec process_location_term(ProtonLocationTerm &pterm) { + auto old_view = pterm.getView(); + auto new_view = PositionDataType::getZCurveFieldName(old_view); + pterm.setView(new_view); + const GeoLocation &loc = pterm.getTerm(); + return GeoLocationSpec{new_view, loc}; +} + +void exchange_location_nodes(const string &location_str, + Node::UP &query_tree, + std::vector<search::fef::Location> &fef_locations) +{ + std::vector<GeoLocationSpec> locationSpecs; - int32_t id = -1; - Weight weight(100); - - if (locationSpec.getRankOnDistance()) { - query_tree = inject(std::move(query_tree), std::make_unique<ProtonLocationTerm>(loc, view, id, weight)); - fef_location.setAttribute(view); - fef_location.setXPosition(locationSpec.getX()); - fef_location.setYPosition(locationSpec.getY()); - fef_location.setXAspect(locationSpec.getXAspect()); - fef_location.setValid(true); - } else if (locationSpec.getPruneOnDistance()) { - query_tree = inject(std::move(query_tree), std::make_unique<ProtonLocationTerm>(loc, view, id, weight)); + auto parsed = parse_location_string(location_str); + if (parsed.location.valid()) { + locationSpecs.push_back(parsed); + } + for (ProtonLocationTerm * pterm : find_location_terms(query_tree.get())) { + auto spec = process_location_term(*pterm); + if (spec.location.valid()) { + locationSpecs.push_back(spec); + } + } + for (const GeoLocationSpec &spec : locationSpecs) { + if (spec.location.has_point) { + search::fef::Location fef_loc; + fef_loc.setAttribute(spec.field_name); + fef_loc.setXPosition(spec.location.point.x); + fef_loc.setYPosition(spec.location.point.y); + fef_loc.setXAspect(spec.location.x_aspect.multiplier); + fef_loc.setValid(true); + fef_locations.push_back(fef_loc); + } + } + if (parsed.location.can_limit()) { + int32_t id = -1; + Weight weight(100); + query_tree = inject(std::move(query_tree), + std::make_unique<ProtonLocationTerm>(parsed.location, parsed.field_name, id, weight)); } } @@ -127,7 +170,7 @@ Query::buildTree(vespalib::stringref stack, const string &location, if (_query_tree) { SameElementModifier prefixSameElementSubIndexes; _query_tree->accept(prefixSameElementSubIndexes); - addLocationNode(location, _query_tree, _location); + exchange_location_nodes(location, _query_tree, _locations); _query_tree = UnpackingIteratorsOptimizer::optimize(std::move(_query_tree), bool(_whiteListBlueprint), split_unpacking_iterators, delay_unpacking_iterators); ResolveViewVisitor resolve_visitor(resolver, indexEnv); @@ -146,10 +189,12 @@ Query::extractTerms(vector<const ITermData *> &terms) } void -Query::extractLocations(vector<const Location *> &locations) +Query::extractLocations(vector<const search::fef::Location *> &locations) { locations.clear(); - locations.push_back(&_location); + for (const auto & loc : _locations) { + locations.push_back(&loc); + } } void diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index 60f40e24d1e..5a484d2ce1f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -18,11 +18,11 @@ class ISearchContext; class Query { private: - using Blueprint=search::queryeval::Blueprint; + using Blueprint = search::queryeval::Blueprint; search::query::Node::UP _query_tree; Blueprint::UP _blueprint; - search::fef::Location _location; Blueprint::UP _whiteListBlueprint; + std::vector<search::fef::Location> _locations; public: Query(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp index fe0f6aaff91..667b48d2acd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp @@ -17,7 +17,7 @@ QueryEnvironment::QueryEnvironment(const IIndexEnvironment &indexEnv, : _indexEnv(indexEnv), _attrContext(attrContext), _properties(properties), - _locations(1), + _locations(), _terms(), _field_length_inspector(field_length_inspector) { @@ -44,12 +44,6 @@ QueryEnvironment::getTerm(uint32_t idx) const return _terms[idx]; } -const search::fef::Location & -QueryEnvironment::getLocation() const -{ - return *_locations[0]; -} - const IAttributeContext & QueryEnvironment::getAttributeContext() const { diff --git a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h index 575694ae079..426123599bd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h +++ b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h @@ -70,7 +70,9 @@ public: const search::fef::ITermData *getTerm(uint32_t idx) const override; // inherited from search::fef::IQueryEnvironment - const search::fef::Location & getLocation() const override; + std::vector<const search::fef::Location *> getAllLocations() const override { + return _locations; + } // inherited from search::fef::IQueryEnvironment const search::attribute::IAttributeContext & getAttributeContext() const override; diff --git a/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.cpp b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.cpp new file mode 100644 index 00000000000..b12b48465d9 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.cpp @@ -0,0 +1,26 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "resolveviewvisitor.h" +#include <vespa/document/datatype/positiondatatype.h> +#include <vespa/log/log.h> + +LOG_SETUP(".proton.matching.resolveviewvisitor"); + +namespace proton::matching { + +void +ResolveViewVisitor::visit(ProtonLocationTerm &n) { + // if injected by query.cpp, this should work: + n.resolve(_resolver, _indexEnv); + if (n.numFields() == 0) { + // if received from QRS, this is needed: + auto oldView = n.getView(); + auto newView = document::PositionDataType::getZCurveFieldName(oldView); + n.setView(newView); + n.resolve(_resolver, _indexEnv); + LOG(debug, "ProtonLocationTerm found %zu field after view change %s -> %s", + n.numFields(), oldView.c_str(), newView.c_str()); + } +} + +} // namespace diff --git a/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h index 4a12e6adda9..f8c1a007c28 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h +++ b/searchcore/src/vespa/searchcore/proton/matching/resolveviewvisitor.h @@ -22,6 +22,8 @@ public: template <class TermNode> void visitTerm(TermNode &n) { n.resolve(_resolver, _indexEnv); } + void visit(ProtonLocationTerm &n) override; + void visit(ProtonNodeTypes::Equiv &n) override { visitChildren(n); n.resolveFromChildren(n.getChildren()); diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp index a64704a08e9..cf1506a9118 100644 --- a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp @@ -17,7 +17,6 @@ #include <vespa/searchlib/attribute/singlestringattribute.h> #include <vespa/searchlib/index/dummyfileheadercontext.h> #include <vespa/searchlib/query/tree/location.h> -#include <vespa/searchlib/query/tree/point.h> #include <vespa/searchlib/query/tree/simplequery.h> #include <vespa/searchlib/queryeval/document_weight_search_iterator.h> #include <vespa/searchlib/test/searchiteratorverifier.h> diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp index 2eafeab20bd..87aea2e3e8c 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -339,29 +339,43 @@ TEST("requireThatPrefixTermsWork") { TEST("requireThatLocationTermsWork") { // 0xcc is z-curve for (10, 10). MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); - - SimpleLocationTerm node(Location(Point(10, 10), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(!search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), field, 0, Weight(0)); - EXPECT_TRUE(!search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(search(node, attribute_manager)); + { + SimpleLocationTerm node(Location(Point{10, 10}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + } + { + SimpleLocationTerm node(Location(Point{100, 100}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + } + { + SimpleLocationTerm node(Location(Point{13, 13}, 4, 0), field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + } + { + SimpleLocationTerm node(Location(Point{10, 13}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + } } TEST("requireThatOptimizedLocationTermsWork") { // 0xcc is z-curve for (10, 10). MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); - - SimpleLocationTerm node(Location(Point(10, 10), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(search(node, attribute_manager, true)); - node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(!search(node, attribute_manager, true)); - node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), field, 0, Weight(0)); - EXPECT_TRUE(!search(node, attribute_manager, true)); - node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(search(node, attribute_manager, true)); + { + SimpleLocationTerm node(Location(Point{10, 10}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); + } + { + SimpleLocationTerm node(Location(Point{100, 100}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + } + { + SimpleLocationTerm node(Location(Point{13, 13}, 4, 0), field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + } + { + SimpleLocationTerm node(Location(Point{10, 13}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); + } } TEST("require that optimized location search works with wrapped bounding box (no hits)") { diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp index 5e633dcc97d..3098232b443 100644 --- a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp @@ -254,15 +254,22 @@ TEST(AttributeBlueprintTest, require_that_location_terms_work) { // 0xcc is z-curve for (10, 10). auto attribute_manager = makeAttributeManager(int64_t(0xcc)); - - SimpleLocationTerm node(Location(Point(10, 10), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(do_search(node, attribute_manager, false)); - node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(!do_search(node, attribute_manager, false)); - node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), field, 0, Weight(0)); - EXPECT_TRUE(!do_search(node, attribute_manager, false)); - node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), field, 0, Weight(0)); - EXPECT_TRUE(do_search(node, attribute_manager, false)); + { + SimpleLocationTerm node(Location(Point{10, 10}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(do_search(node, attribute_manager, false)); + } + { + SimpleLocationTerm node(Location(Point{100, 100}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(!do_search(node, attribute_manager, false)); + } + { + SimpleLocationTerm node(Location(Point{13, 13}, 4, 0), field, 0, Weight(0)); + EXPECT_TRUE(!do_search(node, attribute_manager, false)); + } + { + SimpleLocationTerm node(Location(Point{10, 13}, 3, 0), field, 0, Weight(0)); + EXPECT_TRUE(do_search(node, attribute_manager, false)); + } } TEST(AttributeBlueprintTest, require_that_fast_search_location_terms_work) @@ -270,14 +277,14 @@ TEST(AttributeBlueprintTest, require_that_fast_search_location_terms_work) // 0xcc is z-curve for (10, 10). auto attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc)); - SimpleLocationTerm node(Location(Point(10, 10), 3, 0), field, 0, Weight(0)); + SimpleLocationTerm node(Location(Point{10, 10}, 3, 0), field, 0, Weight(0)); #if 0 EXPECT_TRUE(search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),field, 0, Weight(0)); + node = SimpleLocationTerm(Location(Point{100, 100}, 3, 0),field, 0, Weight(0)); EXPECT_TRUE(!search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),field, 0, Weight(0)); + node = SimpleLocationTerm(Location(Point{13, 13}, 4, 0),field, 0, Weight(0)); EXPECT_TRUE(!search(node, attribute_manager)); - node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),field, 0, Weight(0)); + node = SimpleLocationTerm(Location(Point{10, 13}, 3, 0),field, 0, Weight(0)); EXPECT_TRUE(search(node, attribute_manager)); #endif } diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt index 64a894096d5..ea0d96529e1 100644 --- a/searchlib/src/tests/common/location/CMakeLists.txt +++ b/searchlib/src/tests/common/location/CMakeLists.txt @@ -1,8 +1,9 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchlib_location_test_app TEST +vespa_add_executable(searchlib_geo_location_test_app TEST SOURCES - location_test.cpp + geo_location_test.cpp DEPENDS searchlib + GTest::GTest ) -vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app) +vespa_add_test(NAME searchlib_geo_location_test_app COMMAND searchlib_geo_location_test_app) diff --git a/searchlib/src/tests/common/location/geo_location_test.cpp b/searchlib/src/tests/common/location/geo_location_test.cpp new file mode 100644 index 00000000000..8093ea61697 --- /dev/null +++ b/searchlib/src/tests/common/location/geo_location_test.cpp @@ -0,0 +1,136 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <stdio.h> +#include <vespa/searchlib/common/geo_location.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/searchlib/common/geo_location_parser.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::common::GeoLocation; +using search::common::GeoLocationParser; + +bool is_parseable(const char *str) { + GeoLocationParser parser; + return parser.parseOldFormat(str); +} + +GeoLocation parse(const char *str) { + GeoLocationParser parser; + EXPECT_TRUE(parser.parseOldFormat(str)); + return parser.getGeoLocation(); +} + +TEST(GeoLocationTest, malformed_bounding_boxes_are_not_parseable) { + EXPECT_TRUE(is_parseable("[2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[1,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[3,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40")); + EXPECT_FALSE(is_parseable("[2,10,20,30]")); + EXPECT_FALSE(is_parseable("[10,20,30,40]")); +} + +TEST(GeoLocationTest, malformed_circles_are_not_parseable) { + EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000")); + EXPECT_FALSE(is_parseable("(10,20,5)")); +} + +TEST(GeoLocationTest, bounding_boxes_can_be_parsed) { + auto loc = parse("[2,10,20,30,40]"); + EXPECT_EQ(false, loc.has_point); + EXPECT_EQ(true, loc.bounding_box.active()); + EXPECT_EQ(0u, loc.x_aspect.multiplier); + EXPECT_EQ(0, loc.point.x); + EXPECT_EQ(0, loc.point.y); + EXPECT_EQ(std::numeric_limits<uint32_t>::max(), loc.radius); + EXPECT_EQ(10, loc.bounding_box.x.low); + EXPECT_EQ(20, loc.bounding_box.y.low); + EXPECT_EQ(30, loc.bounding_box.x.high); + EXPECT_EQ(40, loc.bounding_box.y.high); +} + +TEST(GeoLocationTest, circles_can_be_parsed) { + auto loc = parse("(2,10,20,5,0,0,0)"); + EXPECT_EQ(true, loc.has_point); + EXPECT_EQ(true, loc.bounding_box.active()); + EXPECT_EQ(0u, loc.x_aspect.multiplier); + EXPECT_EQ(10, loc.point.x); + EXPECT_EQ(20, loc.point.y); + EXPECT_EQ(5u, loc.radius); + EXPECT_EQ(5, loc.bounding_box.x.low); + EXPECT_EQ(15, loc.bounding_box.y.low); + EXPECT_EQ(15, loc.bounding_box.x.high); + EXPECT_EQ(25, loc.bounding_box.y.high); +} + +TEST(GeoLocationTest, circles_can_have_aspect_ratio) { + auto loc = parse("(2,10,20,5,0,0,0,2147483648)"); + EXPECT_EQ(true, loc.has_point); + EXPECT_EQ(true, loc.bounding_box.active()); + EXPECT_EQ(2147483648u, loc.x_aspect.multiplier); + EXPECT_EQ(10, loc.point.x); + EXPECT_EQ(20, loc.point.y); + EXPECT_EQ(5u, loc.radius); + EXPECT_EQ(-1, loc.bounding_box.x.low); + EXPECT_EQ(15, loc.bounding_box.y.low); + EXPECT_EQ(21, loc.bounding_box.x.high); + EXPECT_EQ(25, loc.bounding_box.y.high); +} + +TEST(GeoLocationTest, bounding_box_can_be_specified_after_circle) { + auto loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]"); + EXPECT_EQ(true, loc.has_point); + EXPECT_EQ(true, loc.bounding_box.active()); + EXPECT_EQ(0u, loc.x_aspect.multiplier); + EXPECT_EQ(10, loc.point.x); + EXPECT_EQ(20, loc.point.y); + EXPECT_EQ(5u, loc.radius); + EXPECT_EQ(10, loc.bounding_box.x.low); + EXPECT_EQ(20, loc.bounding_box.y.low); + EXPECT_EQ(15, loc.bounding_box.x.high); + EXPECT_EQ(25, loc.bounding_box.y.high); +} + +TEST(GeoLocationTest, circles_can_be_specified_after_bounding_box) { + auto loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)"); + EXPECT_EQ(true, loc.has_point); + EXPECT_EQ(true, loc.bounding_box.active()); + EXPECT_EQ(0u, loc.x_aspect.multiplier); + EXPECT_EQ(10, loc.point.x); + EXPECT_EQ(20, loc.point.y); + EXPECT_EQ(5u, loc.radius); + EXPECT_EQ(10, loc.bounding_box.x.low); + EXPECT_EQ(20, loc.bounding_box.y.low); + EXPECT_EQ(15, loc.bounding_box.x.high); + EXPECT_EQ(25, loc.bounding_box.y.high); +} + +TEST(GeoLocationTest, santa_search_gives_non_wrapped_bounding_box) { + auto loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)"); + EXPECT_GE(loc.bounding_box.x.high, loc.bounding_box.x.low); + EXPECT_GE(loc.bounding_box.y.high, loc.bounding_box.y.low); +} + +TEST(GeoLocationTest, near_boundary_search_gives_non_wrapped_bounding_box) { + auto loc1 = parse("(2,2000000000,2000000000,3000000000,0,1,0)"); + EXPECT_GE(loc1.bounding_box.x.high, loc1.bounding_box.x.low); + EXPECT_GE(loc1.bounding_box.y.high, loc1.bounding_box.y.low); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.bounding_box.y.high); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.bounding_box.y.high); + + auto loc2 = parse("(2,-2000000000,-2000000000,3000000000,0,1,0)"); + EXPECT_GE(loc2.bounding_box.x.high, loc2.bounding_box.x.low); + EXPECT_GE(loc2.bounding_box.y.high, loc2.bounding_box.y.low); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.bounding_box.x.low); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.bounding_box.y.low); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/common/location/location_test.cpp b/searchlib/src/tests/common/location/location_test.cpp deleted file mode 100644 index d781e5b7275..00000000000 --- a/searchlib/src/tests/common/location/location_test.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/test_kit.h> -#include <vespa/searchlib/common/location.h> -#include <vespa/searchlib/attribute/attributeguard.h> - - -using search::common::Location; - -bool is_parseable(const char *str) { - Location loc; - return loc.parse(str); -} - -Location parse(const char *str) { - Location loc; - if (!EXPECT_TRUE(loc.parse(str))) { - fprintf(stderr, " parse error: %s\n", loc.getParseError()); - } - return loc; -} - -TEST("require that malformed bounding boxes are not parseable") { - EXPECT_TRUE(is_parseable("[2,10,20,30,40]")); - EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]")); - EXPECT_FALSE(is_parseable("[1,10,20,30,40]")); - EXPECT_FALSE(is_parseable("[3,10,20,30,40]")); - EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]")); - EXPECT_FALSE(is_parseable("[2,10,20,30,40")); - EXPECT_FALSE(is_parseable("[2,10,20,30]")); - EXPECT_FALSE(is_parseable("[10,20,30,40]")); -} - -TEST("require that malformed circles are not parseable") { - EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)")); - EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)")); - EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)")); - EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)")); - EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)")); - EXPECT_FALSE(is_parseable("(2,10,20,5)")); - EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0")); - EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000")); - EXPECT_FALSE(is_parseable("(10,20,5)")); -} - -TEST("require that bounding boxes can be parsed") { - Location loc = parse("[2,10,20,30,40]"); - EXPECT_EQUAL(false, loc.getRankOnDistance()); - EXPECT_EQUAL(true, loc.getPruneOnDistance()); - EXPECT_EQUAL(0u, loc.getXAspect()); - EXPECT_EQUAL(0, loc.getX()); - EXPECT_EQUAL(0, loc.getY()); - EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), loc.getRadius()); - EXPECT_EQUAL(10, loc.getMinX()); - EXPECT_EQUAL(20, loc.getMinY()); - EXPECT_EQUAL(30, loc.getMaxX()); - EXPECT_EQUAL(40, loc.getMaxY()); -} - -TEST("require that circles can be parsed") { - Location loc = parse("(2,10,20,5,0,0,0)"); - EXPECT_EQUAL(true, loc.getRankOnDistance()); - EXPECT_EQUAL(true, loc.getPruneOnDistance()); - EXPECT_EQUAL(0u, loc.getXAspect()); - EXPECT_EQUAL(10, loc.getX()); - EXPECT_EQUAL(20, loc.getY()); - EXPECT_EQUAL(5u, loc.getRadius()); - EXPECT_EQUAL(5, loc.getMinX()); - EXPECT_EQUAL(15, loc.getMinY()); - EXPECT_EQUAL(15, loc.getMaxX()); - EXPECT_EQUAL(25, loc.getMaxY()); -} - -TEST("require that circles can have aspect ratio") { - Location loc = parse("(2,10,20,5,0,0,0,2147483648)"); - EXPECT_EQUAL(true, loc.getRankOnDistance()); - EXPECT_EQUAL(true, loc.getPruneOnDistance()); - EXPECT_EQUAL(2147483648u, loc.getXAspect()); - EXPECT_EQUAL(10, loc.getX()); - EXPECT_EQUAL(20, loc.getY()); - EXPECT_EQUAL(5u, loc.getRadius()); - EXPECT_EQUAL(-1, loc.getMinX()); - EXPECT_EQUAL(15, loc.getMinY()); - EXPECT_EQUAL(21, loc.getMaxX()); - EXPECT_EQUAL(25, loc.getMaxY()); -} - -TEST("require that bounding box can be specified after circle") { - Location loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]"); - EXPECT_EQUAL(true, loc.getRankOnDistance()); - EXPECT_EQUAL(true, loc.getPruneOnDistance()); - EXPECT_EQUAL(0u, loc.getXAspect()); - EXPECT_EQUAL(10, loc.getX()); - EXPECT_EQUAL(20, loc.getY()); - EXPECT_EQUAL(5u, loc.getRadius()); - EXPECT_EQUAL(10, loc.getMinX()); - EXPECT_EQUAL(20, loc.getMinY()); - EXPECT_EQUAL(15, loc.getMaxX()); - EXPECT_EQUAL(25, loc.getMaxY()); -} - -TEST("require that circles can be specified after bounding box") { - Location loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)"); - EXPECT_EQUAL(true, loc.getRankOnDistance()); - EXPECT_EQUAL(true, loc.getPruneOnDistance()); - EXPECT_EQUAL(0u, loc.getXAspect()); - EXPECT_EQUAL(10, loc.getX()); - EXPECT_EQUAL(20, loc.getY()); - EXPECT_EQUAL(5u, loc.getRadius()); - EXPECT_EQUAL(10, loc.getMinX()); - EXPECT_EQUAL(20, loc.getMinY()); - EXPECT_EQUAL(15, loc.getMaxX()); - EXPECT_EQUAL(25, loc.getMaxY()); -} - -TEST("require that santa search gives non-wrapped bounding box") { - Location loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)"); - EXPECT_GREATER_EQUAL(loc.getMaxX(), loc.getMinX()); - EXPECT_GREATER_EQUAL(loc.getMaxY(), loc.getMinY()); -} - -TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp index edbc29be784..8441dc2227f 100644 --- a/searchlib/src/tests/query/query_visitor_test.cpp +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -90,7 +90,7 @@ void Test::requireThatAllNodesCanBeVisited() { checkVisit<WandTerm>(new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7)); checkVisit<Rank>(new SimpleRank); checkVisit<NumberTerm>(new SimpleNumberTerm("0.42", "field", 0, Weight(0))); - const Location location(Point(10, 10), 20, 0); + const Location location(Point{10, 10}, 20, 0); checkVisit<LocationTerm>(new SimpleLocationTerm(location, "field", 0, Weight(0))); checkVisit<PrefixTerm>(new SimplePrefixTerm("t", "field", 0, Weight(0))); checkVisit<RangeTerm>(new SimpleRangeTerm(Range(0, 1), "field", 0, Weight(0))); diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index d093bc4242e..269600d26d4 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -32,7 +32,7 @@ const size_t distance = 4; const string int1 = "42"; const string float1 = "3.14"; const Range range(32, 64); -const Point position(100, 100); +const Point position{100, 100}; const int max_distance = 20; const uint32_t x_aspect = 0; const Location location(position, max_distance, x_aspect); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 4a34a07a773..cb587d77133 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -248,11 +248,14 @@ public: LocationPostFilterBlueprint(const FieldSpec &field, const IAttributeVector &attribute, const Location &loc) : ComplexLeafBlueprint(field), _attribute(attribute), - _location() + _location(loc) { - _location.setVec(attribute); - _location.parse(loc.getLocationString()); - uint32_t estHits = _attribute.getNumDocs(); + uint32_t estHits = 0; + if (loc.valid()) { + _location.setVec(attribute); + estHits = _attribute.getNumDocs(); + } + LOG(debug, "location %s in attribute with numdocs %u", loc.getOldFormatString().c_str(), estHits); HitEstimate estimate(estHits, estHits == 0); setEstimate(estimate); } @@ -272,14 +275,16 @@ Blueprint::UP make_location_blueprint(const FieldSpec &field, const IAttributeVector &attribute, const Location &loc) { auto post_filter = std::make_unique<LocationPostFilterBlueprint>(field, attribute, loc); const common::Location &location = post_filter->location(); - if (location.getMinX() > location.getMaxX() || - location.getMinY() > location.getMaxY()) + if (location.bounding_box.x.low > location.bounding_box.x.high || + location.bounding_box.y.low > location.bounding_box.y.high) { return std::make_unique<queryeval::EmptyBlueprint>(field); } ZCurve::RangeVector rangeVector = ZCurve::find_ranges( - location.getMinX(), location.getMinY(), - location.getMaxX(), location.getMaxY()); + location.bounding_box.x.low, + location.bounding_box.y.low, + location.bounding_box.x.high, + location.bounding_box.y.high); auto pre_filter = std::make_unique<LocationPreFilterBlueprint>(field, attribute, rangeVector); if (!pre_filter->should_use()) { return post_filter; diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt index 5d30260a169..7e1cfd8fec5 100644 --- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -12,6 +12,9 @@ vespa_add_library(searchlib_common OBJECT featureset.cpp fileheadercontext.cpp gatecallback.cpp + geo_location.cpp + geo_location_spec.cpp + geo_location_parser.cpp growablebitvector.cpp indexmetainfo.cpp location.cpp diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.cpp b/searchlib/src/vespa/searchlib/common/documentlocations.cpp index b03176f0ad2..b8f05581b41 100644 --- a/searchlib/src/vespa/searchlib/common/documentlocations.cpp +++ b/searchlib/src/vespa/searchlib/common/documentlocations.cpp @@ -21,5 +21,9 @@ DocumentLocations::setVecGuard(std::unique_ptr<search::AttributeGuard> guard) { setVec(*_vec_guard.get()->get()); } +DocumentLocations::DocumentLocations(DocumentLocations &&) = default; +DocumentLocations & DocumentLocations::operator = (DocumentLocations &&) = default; + + } // namespace common } // namespace search diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.h b/searchlib/src/vespa/searchlib/common/documentlocations.h index 1dab68ca11f..51d5be76e65 100644 --- a/searchlib/src/vespa/searchlib/common/documentlocations.h +++ b/searchlib/src/vespa/searchlib/common/documentlocations.h @@ -25,8 +25,8 @@ private: const search::attribute::IAttributeVector *_vec; public: - DocumentLocations(DocumentLocations &&) = default; - DocumentLocations & operator = (DocumentLocations &&) = default; + DocumentLocations(DocumentLocations &&); + DocumentLocations & operator = (DocumentLocations &&); DocumentLocations(); virtual ~DocumentLocations(); diff --git a/searchlib/src/vespa/searchlib/common/geo_location.cpp b/searchlib/src/vespa/searchlib/common/geo_location.cpp new file mode 100644 index 00000000000..6dd7b83ae37 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location.cpp @@ -0,0 +1,184 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "geo_location.h" + +using vespalib::geo::ZCurve; + +namespace search::common { + +namespace { + +ZCurve::BoundingBox to_z(GeoLocation::Box box) { + return ZCurve::BoundingBox(box.x.low, box.x.high, + box.y.low, box.y.high); +} + +GeoLocation::Box +adjust_bounding_box(GeoLocation::Box orig, GeoLocation::Point point, uint32_t radius, GeoLocation::Aspect x_aspect) +{ + if (radius == GeoLocation::radius_inf) { + // only happens if GeoLocation is explicitly constructed with "infinite" radius + return orig; + } + uint32_t maxdx = radius; + if (x_aspect.active()) { + // x_aspect is a 32-bit fixed-point number in range [0,1] + // so this implements maxdx = ceil(radius/x_aspect) + uint64_t maxdx2 = ((static_cast<uint64_t>(radius) << 32) + 0xffffffffu) / x_aspect.multiplier; + if (maxdx2 >= 0xffffffffu) { + maxdx = 0xffffffffu; + } else { + maxdx = static_cast<uint32_t>(maxdx2); + } + } + // implied limits from radius and point: + int64_t implied_max_x = int64_t(point.x) + int64_t(maxdx); + int64_t implied_min_x = int64_t(point.x) - int64_t(maxdx); + + int64_t implied_max_y = int64_t(point.y) + int64_t(radius); + int64_t implied_min_y = int64_t(point.y) - int64_t(radius); + + int32_t max_x = orig.x.high; + int32_t min_x = orig.x.low; + + int32_t max_y = orig.y.high; + int32_t min_y = orig.y.low; + + if (implied_max_x < max_x) max_x = implied_max_x; + if (implied_min_x > min_x) min_x = implied_min_x; + + if (implied_max_y < max_y) max_y = implied_max_y; + if (implied_min_y > min_y) min_y = implied_min_y; + + return GeoLocation::Box{GeoLocation::Range{min_x, max_x}, + GeoLocation::Range{min_y, max_y}}; +} + +} // namespace <unnamed> + +GeoLocation::GeoLocation() + : has_point(false), + point{0, 0}, + radius(radius_inf), + x_aspect(), + bounding_box(no_box), + _sq_radius(sq_radius_inf), + _z_bounding_box(0,0,0,0) +{} + +GeoLocation::GeoLocation(Point p) + : has_point(true), + point(p), + radius(radius_inf), + x_aspect(), + bounding_box(no_box), + _sq_radius(sq_radius_inf), + _z_bounding_box(0,0,0,0) +{} + +GeoLocation::GeoLocation(Point p, Aspect xa) + : has_point(true), + point(p), + radius(radius_inf), + x_aspect(xa), + bounding_box(no_box), + _sq_radius(sq_radius_inf), + _z_bounding_box(0,0,0,0) +{} + +GeoLocation::GeoLocation(Point p, uint32_t r) + : has_point(true), + point(p), + radius(r), + x_aspect(), + bounding_box(adjust_bounding_box(no_box, p, r, Aspect())), + _sq_radius(uint64_t(r) * uint64_t(r)), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Point p, uint32_t r, Aspect xa) + : has_point(true), + point(p), + radius(r), + x_aspect(xa), + bounding_box(adjust_bounding_box(no_box, p, r, xa)), + _sq_radius(uint64_t(r) * uint64_t(r)), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Box b) + : has_point(false), + point{0, 0}, + radius(radius_inf), + x_aspect(), + bounding_box(b), + _sq_radius(sq_radius_inf), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Box b, Point p) + : has_point(true), + point(p), + radius(radius_inf), + x_aspect(), + bounding_box(b), + _sq_radius(sq_radius_inf), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Box b, Point p, Aspect xa) + : has_point(true), + point(p), + radius(radius_inf), + x_aspect(xa), + bounding_box(b), + _sq_radius(sq_radius_inf), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Box b, Point p, uint32_t r) + : has_point(true), + point(p), + radius(r), + x_aspect(), + bounding_box(adjust_bounding_box(b, p, r, Aspect())), + _sq_radius(uint64_t(r) * uint64_t(r)), + _z_bounding_box(to_z(bounding_box)) +{} + +GeoLocation::GeoLocation(Box b, Point p, uint32_t r, Aspect xa) + : has_point(true), + point(p), + radius(r), + x_aspect(xa), + bounding_box(adjust_bounding_box(b, p, r, xa)), + _sq_radius(uint64_t(r) * uint64_t(r)), + _z_bounding_box(to_z(bounding_box)) +{} + +uint64_t GeoLocation::sq_distance_to(Point p) const { + if (has_point) { + uint64_t dx = (p.x > point.x) ? (p.x - point.x) : (point.x - p.x); + if (x_aspect.active()) { + // x_aspect is a 32-bit fixed-point number in range [0,1] + // this implements dx = (dx * x_aspect) + dx = (dx * x_aspect.multiplier) >> 32; + } + uint64_t dy = (p.y > point.y) ? (p.y - point.y) : (point.y - p.y); + return dx*dx + dy*dy; + } + return 0; +} + +bool GeoLocation::inside_limit(Point p) const { + if (p.x < bounding_box.x.low) return false; + if (p.x > bounding_box.x.high) return false; + + if (p.y < bounding_box.y.low) return false; + if (p.y > bounding_box.y.high) return false; + + uint64_t sq_dist = sq_distance_to(p); + return sq_dist <= _sq_radius; +} + +} // namespace search::common diff --git a/searchlib/src/vespa/searchlib/common/geo_location.h b/searchlib/src/vespa/searchlib/common/geo_location.h new file mode 100644 index 00000000000..5d04a09142a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location.h @@ -0,0 +1,90 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <string> +#include <cstdint> +#include <limits> +#include <vespa/vespalib/geo/zcurve.h> + +namespace search::common { + +/** + * An immutable struct for a (geo) location. + * Contains a point with optional radius, a bounding box, or both. + **/ +struct GeoLocation +{ + // contained structs and helper constants: + static constexpr int32_t range_low = std::numeric_limits<int32_t>::min(); + static constexpr int32_t range_high = std::numeric_limits<int32_t>::max(); + static constexpr uint32_t radius_inf = std::numeric_limits<uint32_t>::max(); + struct Point { + const int32_t x; + const int32_t y; + Point() = delete; + }; + struct Aspect { + uint32_t multiplier; + Aspect() : multiplier(0) {} + Aspect(uint32_t multiplier_in) : multiplier(multiplier_in) {} + bool active() const { return multiplier != 0; } + }; + struct Range { + const int32_t low; + const int32_t high; + bool active() const { + return (low != range_low) || (high != range_high); + } + }; + static constexpr Range no_range = {range_low, range_high}; + struct Box { + const Range x; + const Range y; + bool active() const { return x.active() || y.active(); } + }; + static constexpr Box no_box = {no_range, no_range}; + + // actual content of struct: + const bool has_point; + Point point; + uint32_t radius; + Aspect x_aspect; + Box bounding_box; + GeoLocation(); + + // constructors: + GeoLocation(Point p); + GeoLocation(Point p, Aspect xa); + GeoLocation(Point p, uint32_t r); + GeoLocation(Point p, uint32_t r, Aspect xa); + GeoLocation(Box b); + GeoLocation(Box b, Point p); + GeoLocation(Box b, Point p, Aspect xa); + GeoLocation(Box b, Point p, uint32_t r); + GeoLocation(Box b, Point p, uint32_t r, Aspect xa); + + // helper methods: + bool has_radius() const { return radius != radius_inf; } + bool valid() const { return has_point || bounding_box.active(); } + bool can_limit() const { return bounding_box.active(); } + + uint64_t sq_distance_to(Point p) const; + bool inside_limit(Point p) const; + + bool inside_limit(int64_t zcurve_encoded_xy) const { + if (_z_bounding_box.getzFailBoundingBoxTest(zcurve_encoded_xy)) return false; + int32_t x = 0; + int32_t y = 0; + vespalib::geo::ZCurve::decode(zcurve_encoded_xy, &x, &y); + return inside_limit(Point{x, y}); + } + +private: + // constants for implementation of helper methods: + static constexpr uint64_t sq_radius_inf = std::numeric_limits<uint64_t>::max(); + const uint64_t _sq_radius; + const vespalib::geo::ZCurve::BoundingBox _z_bounding_box; +}; + +} // namespace diff --git a/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp new file mode 100644 index 00000000000..05c53348699 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_parser.cpp @@ -0,0 +1,209 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "geo_location_parser.h" +#include <limits> +#include <vespa/vespalib/stllike/asciistream.h> + +namespace { + +int getInt(const char * &p) { + int val; + bool isminus; + val = 0; + isminus = false; + if (*p == '-') { + isminus = true; + p++; + } + while (*p >= '0' && *p <= '9') { + val *= 10; + val += (*p++ - '0'); + } + return isminus ? - val : val; +} + +} // namespace <unnamed> + +namespace search::common { + +GeoLocationParser::GeoLocationParser() + : _valid(false), + _has_point(false), + _has_bounding_box(false), + _field_name(), + _x(0), + _y(0), + _x_aspect(0u), + _radius(std::numeric_limits<uint32_t>::max()), + _min_x(std::numeric_limits<int32_t>::min()), + _max_x(std::numeric_limits<int32_t>::max()), + _min_y(std::numeric_limits<int32_t>::min()), + _max_y(std::numeric_limits<int32_t>::max()), + _parseError(NULL) +{} + +bool +GeoLocationParser::correctDimensionalitySkip(const char * &p) { + if (*p == '2') { + p++; + if (*p != ',') { + _parseError = "Missing comma after 2D dimensionality"; + return false; + } + p++; + return true; + } + _parseError = "Bad dimensionality spec, not 2D"; + return false; +} + +bool +GeoLocationParser::parseOldFormatWithField(const std::string &str) +{ + auto sep = str.find(':'); + if (sep == std::string::npos) { + _parseError = "Location string lacks field specification."; + return false; + } + _field_name = str.substr(0, sep); + std::string only_loc = str.substr(sep + 1); + return parseOldFormat(only_loc); +} + +bool +GeoLocationParser::parseOldFormat(const std::string &locStr) +{ + bool foundBoundingBox = false; + bool foundLoc = false; + const char *p = locStr.c_str(); + while (*p != '\0') { + if (*p == '[') { + p++; + if (foundBoundingBox) { + _parseError = "Duplicate bounding box"; + return false; + } + foundBoundingBox = true; + if (!correctDimensionalitySkip(p)) { + return false; + } + _min_x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after minx"; + return false; + } + p++; + _min_y = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after miny"; + return false; + } + p++; + _max_x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after maxx"; + return false; + } + p++; + _max_y = getInt(p); + if (*p != ']') { + _parseError = "Missing ']' after maxy"; + return false; + } + p++; + } else if (*p == '(') { + p++; + if (foundLoc) { + _parseError = "Duplicate location"; + return false; + } + foundLoc = true; + if (!correctDimensionalitySkip(p)) { + return false; + } + _x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after x position"; + return false; + } + p++; + _y = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after y position"; + return false; + } + p++; + _radius = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after radius"; + return false; + } + p++; + /* _tableID = */ (void) getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after tableID"; + return false; + } + p++; + /* _rankMultiplier = */ (void) getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after rank multiplier"; + return false; + } + p++; + /* _rankOnlyOnDistance = */ (void) getInt(p); + if (*p == ',') { + p++; + _x_aspect = getInt(p); + if (*p != ')') { + _parseError = "Missing ')' after xAspect"; + return false; + } + } else { + if (*p != ')') { + _parseError = "Missing ')' after rankOnlyOnDistance flag"; + return false; + } + } + p++; + } else if (*p == ' ') { + p++; + } else { + _parseError = "Unexpected char in location spec"; + return false; + } + } + _has_point = foundLoc; + _has_bounding_box = foundBoundingBox; + _valid = (_has_point || _has_bounding_box); + return _valid; +} + +GeoLocation +GeoLocationParser::getGeoLocation() const +{ + GeoLocation::Aspect aspect(_x_aspect); + if (_has_bounding_box) { + GeoLocation::Range x_range{_min_x, _max_x}; + GeoLocation::Range y_range{_min_y, _max_y}; + GeoLocation::Box bounding_box{x_range, y_range}; + if (_has_point) { + GeoLocation::Point point{_x, _y}; + if (_radius == GeoLocation::radius_inf) { + return GeoLocation(bounding_box, point, aspect); + } + return GeoLocation(bounding_box, point, _radius, aspect); + } + return GeoLocation(bounding_box); + } + if (_has_point) { + GeoLocation::Point point{_x, _y}; + if (_radius == GeoLocation::radius_inf) { + return GeoLocation(point, aspect); + } + return GeoLocation(point, _radius, aspect); + } + return GeoLocation(); +} + +} // namespace diff --git a/searchlib/src/vespa/searchlib/common/geo_location_parser.h b/searchlib/src/vespa/searchlib/common/geo_location_parser.h new file mode 100644 index 00000000000..8936a620d21 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_parser.h @@ -0,0 +1,47 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <string> +#include <cstdint> +#include "geo_location.h" +#include "geo_location_spec.h" + +namespace search::common { + +/** + * Parser for a geo-location string representation. + **/ +class GeoLocationParser +{ +public: + GeoLocationParser(); + + bool parseOldFormat(const std::string &locStr); + bool parseOldFormatWithField(const std::string &str); + + std::string getFieldName() const { return _field_name; } + GeoLocation getGeoLocation() const; + + const char * getParseError() const { return _parseError; } +private: + bool _valid; + bool _has_point; + bool _has_bounding_box; + + std::string _field_name; + + int32_t _x; /* Query X position */ + int32_t _y; /* Query Y position */ + uint32_t _x_aspect; /* X distance multiplier fraction */ + uint32_t _radius; /* Radius for euclidean distance */ + int32_t _min_x; /* Min X coordinate */ + int32_t _max_x; /* Max X coordinate */ + int32_t _min_y; /* Min Y coordinate */ + int32_t _max_y; /* Max Y coordinate */ + + const char *_parseError; + bool correctDimensionalitySkip(const char * &p); +}; + +} // namespace diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp new file mode 100644 index 00000000000..271946e2df6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp @@ -0,0 +1,3 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "geo_location_spec.h" diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.h b/searchlib/src/vespa/searchlib/common/geo_location_spec.h new file mode 100644 index 00000000000..42c2b8e6c8c --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.h @@ -0,0 +1,21 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <string> +#include <cstdint> +#include "geo_location.h" + +namespace search::common { + +/** + * Immutable specification of a geo-location query item. + **/ +struct GeoLocationSpec +{ +public: + const std::string field_name; + const GeoLocation location; +}; + +} // namespace diff --git a/searchlib/src/vespa/searchlib/common/location.cpp b/searchlib/src/vespa/searchlib/common/location.cpp index 6927d9ab6cb..171dcecaa33 100644 --- a/searchlib/src/vespa/searchlib/common/location.cpp +++ b/searchlib/src/vespa/searchlib/common/location.cpp @@ -5,198 +5,6 @@ namespace search::common { -Location::Location() : - _zBoundingBox(0,0,0,0), - _x(0), - _y(0), - _xAspect(0u), - _radius(std::numeric_limits<uint32_t>::max()), - _minx(std::numeric_limits<int32_t>::min()), - _maxx(std::numeric_limits<int32_t>::max()), - _miny(std::numeric_limits<int32_t>::min()), - _maxy(std::numeric_limits<int32_t>::max()), - _rankOnDistance(false), - _pruneOnDistance(false), - _parseError(NULL) -{ -} +Location::Location(const GeoLocation &from) : GeoLocation(from) {} - -bool -Location::getDimensionality(const char **pp) -{ - if (**pp == '2') { - (*pp)++; - if (**pp != ',') { - _parseError = "Missing comma after 2D dimensionality"; - return false; - } - (*pp)++; - return true; - } - _parseError = "Bad dimensionality spec, not 2D"; - return false; -} - - -int -Location::getInt(const char **pp) -{ - const char *p = *pp; - int val; - bool isminus; - - val = 0; - isminus = false; - if (*p == '-') { - isminus = true; - p++; - } - while (*p >= '0' && *p <= '9') - val = val * 10 + *p++ - '0'; - *pp = p; - return isminus ? - val : val; -} - -bool Location::parse(const vespalib::string &locStr) -{ - bool hadCutoff = false; - bool hadLoc = false; - const char *p = locStr.c_str(); - while (*p != '\0') { - if (*p == '[') { - p++; - if (hadCutoff) { - _parseError = "Duplicate square cutoff"; - return false; - } - hadCutoff = true; - if (!getDimensionality(&p)) - return false; - _minx = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after minx"; - return false; - } - p++; - _miny = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after miny"; - return false; - } - p++; - _maxx = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after maxx"; - return false; - } - p++; - _maxy = getInt(&p); - if (*p != ']') { - _parseError = "Missing ']' after maxy"; - return false; - } - p++; - } else if (*p == '(') { - p++; - if (hadLoc) { - _parseError = "Duplicate location"; - return false; - } - hadLoc = true; - if (!getDimensionality(&p)) - return false; - _x = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after x position"; - return false; - } - p++; - _y = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after y position"; - return false; - } - p++; - _radius = getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after radius"; - return false; - } - p++; - /* _tableID = */ (void) getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after tableID"; - return false; - } - p++; - /* _rankMultiplier = */ (void) getInt(&p); - if (*p != ',') { - _parseError = "Missing ',' after rank multiplier"; - return false; - } - p++; - /* _rankOnlyOnDistance = */ (void) (getInt(&p) != 0); - if (*p == ',') { - p++; - _xAspect = getInt(&p); - if (*p != ')') { - _parseError = "Missing ')' after xAspect"; - return false; - } - } else { - if (*p != ')') { - _parseError = "Missing ')' after rankOnlyOnDistance flag"; - return false; - } - } - p++; - } else if (*p == ' ') - p++; - else { - _parseError = "Unexpected char in location spec"; - return false; - } - } - - if (hadLoc) { - _rankOnDistance = true; - uint32_t maxdx = _radius; - if (_xAspect != 0) { - uint64_t maxdx2 = ((static_cast<uint64_t>(_radius) << 32) + 0xffffffffu) / - _xAspect; - if (maxdx2 >= 0xffffffffu) - maxdx = 0xffffffffu; - else - maxdx = static_cast<uint32_t>(maxdx2); - } - if (static_cast<int32_t>(_x - maxdx) > _minx && - static_cast<int64_t>(_x) - static_cast<int64_t>(maxdx) > - static_cast<int64_t>(_minx)) - _minx = _x - maxdx; - if (static_cast<int32_t>(_x + maxdx) < _maxx && - static_cast<int64_t>(_x) + static_cast<int64_t>(maxdx) < - static_cast<int64_t>(_maxx)) - _maxx = _x + maxdx; - if (static_cast<int32_t>(_y - _radius) > _miny && - static_cast<int64_t>(_y) - static_cast<int64_t>(_radius) > - static_cast<int64_t>(_miny)) - _miny = _y - _radius; - if (static_cast<int32_t>(_y + _radius) < _maxy && - static_cast<int64_t>(_y) + static_cast<int64_t>(_radius) < - static_cast<int64_t>(_maxy)) - _maxy = _y + _radius; - } - if (_minx != std::numeric_limits<int32_t>::min() || - _maxx != std::numeric_limits<int32_t>::max() || - _miny != std::numeric_limits<int32_t>::min() || - _maxy != std::numeric_limits<int32_t>::max()) - { - _pruneOnDistance = true; - } - _zBoundingBox = vespalib::geo::ZCurve::BoundingBox(_minx, _maxx, _miny, _maxy); - - return true; -} - -} +} // namespace diff --git a/searchlib/src/vespa/searchlib/common/location.h b/searchlib/src/vespa/searchlib/common/location.h index a00bb83648a..197f92326cd 100644 --- a/searchlib/src/vespa/searchlib/common/location.h +++ b/searchlib/src/vespa/searchlib/common/location.h @@ -3,51 +3,19 @@ #pragma once #include "documentlocations.h" -#include <vespa/vespalib/geo/zcurve.h> - -#include <vespa/vespalib/stllike/string.h> +#include "geo_location.h" namespace search::common { -class Location : public DocumentLocations +class Location : public DocumentLocations, + public GeoLocation { -private: - static int getInt(const char **pp); - bool getDimensionality(const char **pp); - public: - Location(); - bool getRankOnDistance() const { return _rankOnDistance; } - bool getPruneOnDistance() const { return _pruneOnDistance; } - uint32_t getXAspect() const { return _xAspect; } - int32_t getX() const { return _x; } - int32_t getY() const { return _y; } - uint32_t getRadius() const { return _radius; } - const char * getParseError() const { return _parseError; } - int32_t getMinX() const { return _minx; } - int32_t getMinY() const { return _miny; } - int32_t getMaxX() const { return _maxx; } - int32_t getMaxY() const { return _maxy; } - bool getzFailBoundingBoxTest(int64_t docxy) const { - return _zBoundingBox.getzFailBoundingBoxTest(docxy); - } - - bool parse(const vespalib::string &locStr); - -private: - vespalib::geo::ZCurve::BoundingBox _zBoundingBox; - int32_t _x; /* Query X position */ - int32_t _y; /* Query Y position */ - uint32_t _xAspect; /* X distance multiplier fraction */ - uint32_t _radius; /* Radius for euclidean distance */ - int32_t _minx; /* Min X coordinate */ - int32_t _maxx; /* Max X coordinate */ - int32_t _miny; /* Min Y coordinate */ - int32_t _maxy; /* Max Y coordinate */ - - bool _rankOnDistance; - bool _pruneOnDistance; - const char *_parseError; + Location(const GeoLocation& from); + ~Location() {} + Location(Location &&) = default; + bool getRankOnDistance() const { return has_point; } + bool getPruneOnDistance() const { return can_limit(); } }; } diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.cpp b/searchlib/src/vespa/searchlib/common/locationiterators.cpp index 16e465bcd05..d90ed3b41f3 100644 --- a/searchlib/src/vespa/searchlib/common/locationiterators.cpp +++ b/searchlib/src/vespa/searchlib/common/locationiterators.cpp @@ -4,6 +4,9 @@ #include <vespa/searchlib/bitcompression/compression.h> #include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.common.locationiterators"); + using namespace search::common; class FastS_2DZLocationIterator : public search::queryeval::SearchIterator @@ -11,7 +14,6 @@ class FastS_2DZLocationIterator : public search::queryeval::SearchIterator private: const unsigned int _numDocs; const bool _strict; - const uint64_t _radius2; const Location & _location; std::vector<search::AttributeVector::largeint_t> _pos; @@ -31,7 +33,6 @@ FastS_2DZLocationIterator(unsigned int numDocs, : SearchIterator(), _numDocs(numDocs), _strict(strict), - _radius2(static_cast<uint64_t>(location.getRadius()) * location.getRadius()), _location(location), _pos() { @@ -45,6 +46,8 @@ FastS_2DZLocationIterator::~FastS_2DZLocationIterator() = default; void FastS_2DZLocationIterator::doSeek(uint32_t docId) { + LOG(debug, "FastS_2DZLocationIterator: seek(%u) with numDocs=%u endId=%u", + docId, _numDocs, getEndId()); if (__builtin_expect(docId >= _numDocs, false)) { setAtEnd(); return; @@ -62,24 +65,9 @@ FastS_2DZLocationIterator::doSeek(uint32_t docId) } for (uint32_t i = 0; i < numValues; i++) { int64_t docxy(pos[i]); - if ( ! location.getzFailBoundingBoxTest(docxy)) { - int32_t docx = 0; - int32_t docy = 0; - vespalib::geo::ZCurve::decode(docxy, &docx, &docy); - uint32_t dx = (location.getX() > docx) - ? location.getX() - docx - : docx - location.getX(); - if (location.getXAspect() != 0) - dx = ((uint64_t) dx * location.getXAspect()) >> 32; - - uint32_t dy = (location.getY() > docy) - ? location.getY() - docy - : docy - location.getY(); - uint64_t dist2 = (uint64_t) dx * dx + (uint64_t) dy * dy; - if (dist2 <= _radius2) { - setDocId(docId); - return; - } + if (location.inside_limit(docxy)) { + setDocId(docId); + return; } } diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 7a624e64d67..bd2c7becc81 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -82,7 +82,8 @@ ConvertRawscoreToDistance::execute(uint32_t docId) feature_t DistanceExecutor::calculateDistance(uint32_t docId) { - if (_location.isValid() && _pos != nullptr) { + if ((! _locations.empty()) && (_pos != nullptr)) { + LOG(debug, "calculate 2D Z-distance from %zu locations", _locations.size()); return calculate2DZDistance(docId); } return DEFAULT_DISTANCE; @@ -97,35 +98,33 @@ DistanceExecutor::calculate2DZDistance(uint32_t docId) uint64_t sqabsdist = std::numeric_limits<uint64_t>::max(); int32_t docx = 0; int32_t docy = 0; - for (uint32_t i = 0; i < numValues; ++i) { - vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy); - uint32_t dx; - uint32_t dy; - if (_location.getXPosition() > docx) { - dx = _location.getXPosition() - docx; - } else { - dx = docx - _location.getXPosition(); - } - if (_location.getXAspect() != 0) { - dx = ((uint64_t) dx * _location.getXAspect()) >> 32; - } - if (_location.getYPosition() > docy) { - dy = _location.getYPosition() - docy; - } else { - dy = docy - _location.getYPosition(); - } - uint64_t sqdist = (uint64_t) dx * dx + (uint64_t) dy * dy; - if (sqdist < sqabsdist) { - sqabsdist = sqdist; + for (auto loc : _locations) { + assert(loc); + assert(loc->isValid()); + int32_t loc_x = loc->getXPosition(); + int32_t loc_y = loc->getYPosition(); + uint64_t loc_a = loc->getXAspect(); + LOG(debug, "location: x=%u, y=%u, aspect=%zu", loc_x, loc_y, loc_a); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy); + uint32_t dx = (loc_x > docx) ? (loc_x - docx) : (docx - loc_x); + if (loc_a != 0) { + dx = (uint64_t(dx) * loc_a) >> 32; + } + uint32_t dy = (loc_y > docy) ? (loc_y - docy) : (docy - loc_y); + uint64_t sqdist = (uint64_t) dx * dx + (uint64_t) dy * dy; + if (sqdist < sqabsdist) { + sqabsdist = sqdist; + } } } return static_cast<feature_t>(std::sqrt(static_cast<feature_t>(sqabsdist))); } -DistanceExecutor::DistanceExecutor(const Location & location, +DistanceExecutor::DistanceExecutor(std::vector<const Location *> locations, const search::attribute::IAttributeVector * pos) : FeatureExecutor(), - _location(location), + _locations(locations), _pos(pos), _intBuf() { @@ -231,6 +230,7 @@ DistanceBlueprint::setup(const IIndexEnvironment & env, return setup_geopos(env, z); } if (allow_bad_field) { + // TODO remove on Vespa 8 // backwards compatibility fallback: return setup_geopos(env, arg); } @@ -251,11 +251,30 @@ DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash if (_use_item_label) { return stash.create<ConvertRawscoreToDistance>(env, _arg_string); } + // expect geo pos: const search::attribute::IAttributeVector * pos = nullptr; - const Location & location = env.getLocation(); - LOG(debug, "DistanceBlueprint::createExecutor location.valid='%s', attribute='%s'", - location.isValid() ? "true" : "false", _arg_string.c_str()); - if (_use_geo_pos && location.isValid()) { + std::vector<const search::fef::Location *> matching_locs; + std::vector<const search::fef::Location *> other_locs; + + for (auto loc_ptr : env.getAllLocations()) { + if (_use_geo_pos && loc_ptr && loc_ptr->isValid()) { + if (loc_ptr->getAttribute() == _arg_string) { + LOG(debug, "found loc from query env matching '%s'", _arg_string.c_str()); + matching_locs.push_back(loc_ptr); + } else { + LOG(debug, "found loc(%s) from query env not matching arg(%s)", + loc_ptr->getAttribute().c_str(), _arg_string.c_str()); + other_locs.push_back(loc_ptr); + } + } + } + if (matching_locs.empty() && other_locs.empty()) { + LOG(debug, "createExecutor: no valid locations"); + return stash.create<DistanceExecutor>(matching_locs, nullptr); + } + LOG(debug, "createExecutor: valid location, attribute='%s'", _arg_string.c_str()); + + if (_use_geo_pos) { pos = env.getAttributeContext().getAttribute(_arg_string); if (pos != nullptr) { if (!pos->isIntegerType()) { @@ -271,8 +290,8 @@ DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash LOG(warning, "The position attribute '%s' was not found. Will use default distance.", _arg_string.c_str()); } } - - return stash.create<DistanceExecutor>(location, pos); + LOG(debug, "use '%s' locations with pos=%p", matching_locs.empty() ? "other" : "matching", pos); + return stash.create<DistanceExecutor>(matching_locs.empty() ? other_locs : matching_locs, pos); } } diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h index 3a8edd5ee94..024b9f37f31 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.h +++ b/searchlib/src/vespa/searchlib/features/distancefeature.h @@ -12,7 +12,7 @@ namespace search::features { */ class DistanceExecutor : public fef::FeatureExecutor { private: - const fef::Location & _location; + std::vector<const fef::Location *> _locations; const attribute::IAttributeVector * _pos; attribute::IntegerContent _intBuf; @@ -23,10 +23,11 @@ public: /** * Constructs an executor for the distance feature. * - * @param location the location object associated with the query environment. + * @param locations location objects associated with the query environment. * @param pos the attribute to use for positions (expects zcurve encoding). */ - DistanceExecutor(const fef::Location & location, const attribute::IAttributeVector * pos); + DistanceExecutor(std::vector<const fef::Location *> locations, + const attribute::IAttributeVector * pos); void execute(uint32_t docId) override; static const feature_t DEFAULT_DISTANCE; diff --git a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h index 041e9ec67bc..811e7fd4616 100644 --- a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h @@ -58,9 +58,9 @@ public: /** * Obtain the location information associated with this query environment. * - * @return location object. + * @return pointers to location objects. **/ - virtual const Location & getLocation() const = 0; + virtual std::vector<const Location *> getAllLocations() const = 0; /** * Returns the attribute context for this query. diff --git a/searchlib/src/vespa/searchlib/fef/location.h b/searchlib/src/vespa/searchlib/fef/location.h index 5be7d1ce822..3bc693e11b4 100644 --- a/searchlib/src/vespa/searchlib/fef/location.h +++ b/searchlib/src/vespa/searchlib/fef/location.h @@ -39,7 +39,7 @@ public: } /** - * Returns the name of the attribute to use for x positions. + * Returns the name of the attribute to use for positions. * * @return the attribute name. **/ diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h index b2a3d416f5a..3a8cde99c06 100644 --- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h +++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h @@ -73,7 +73,9 @@ public: } const Properties & getProperties() const override { return _queryEnv.getProperties(); } - const Location & getLocation() const override { return _queryEnv.getLocation(); } + std::vector<const Location *> getAllLocations() const override { + return _queryEnv.getAllLocations(); + } const attribute::IAttributeContext & getAttributeContext() const override { return _queryEnv.getAttributeContext(); } double get_average_field_length(const vespalib::string &field_name) const override { return _queryEnv.get_average_field_length(field_name); } const IIndexEnvironment & getIndexEnvironment() const override { return _queryEnv.getIndexEnvironment(); } diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h index 40898281794..4b9bec1ee68 100644 --- a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h @@ -38,7 +38,11 @@ public: const Properties &getProperties() const override { return _properties; } uint32_t getNumTerms() const override { return _terms.size(); } const ITermData *getTerm(uint32_t idx) const override { return idx < _terms.size() ? &_terms[idx] : NULL; } - const Location & getLocation() const override { return _location; } + std::vector<const Location *> getAllLocations() const override { + std::vector<const Location *> retval; + retval.push_back(&_location); + return retval; + } const search::attribute::IAttributeContext &getAttributeContext() const override { return *_attrCtx; } double get_average_field_length(const vespalib::string& field_name) const override { auto itr = _avg_field_lengths.find(field_name); diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h index b4dd9826b84..68e259b92e8 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.h +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -53,7 +53,7 @@ public: ITEM_REGEXP = 24, ITEM_WORD_ALTERNATIVES = 25, ITEM_NEAREST_NEIGHBOR = 26, - ITEM_LOCATION_TERM = 27, + ITEM_GEO_LOCATION_TERM = 27, ITEM_MAX = 28, // Indicates how long tables must be. ITEM_UNDEF = 31, }; diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index 17cbd6dce1b..1820fb0e969 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -207,7 +207,7 @@ SimpleQueryStackDumpIterator::next() } break; case ParseItem::ITEM_NUMTERM: - case ParseItem::ITEM_LOCATION_TERM: + case ParseItem::ITEM_GEO_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_SUBSTRINGTERM: diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index f1599e820ef..66466b030d0 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -63,8 +63,13 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor } } break; + case ParseItem::ITEM_GEO_LOCATION_TERM: + // TODO implement this: + // vespalib::string field = queryRep.getIndexName(); + // vespalib::stringref location_term = queryRep.getTerm(); + // qn = std::make_unique<LocationQueryNode> ...something .... + // break; case ParseItem::ITEM_NUMTERM: - case ParseItem::ITEM_LOCATION_TERM: case ParseItem::ITEM_TERM: case ParseItem::ITEM_PREFIXTERM: case ParseItem::ITEM_REGEXP: diff --git a/searchlib/src/vespa/searchlib/query/tree/location.cpp b/searchlib/src/vespa/searchlib/query/tree/location.cpp index 216c5ec5ad0..6e678f9e682 100644 --- a/searchlib/src/vespa/searchlib/query/tree/location.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/location.cpp @@ -6,58 +6,71 @@ #include <vespa/vespalib/stllike/asciistream.h> using vespalib::asciistream; +using search::common::GeoLocation; namespace search::query { -Location::Location(const Point &point, uint32_t max_dist, uint32_t x_aspect) { - asciistream loc; - loc << "(2" // dimensionality - << "," << point.x - << "," << point.y - << "," << max_dist - << "," << "0" // table id. - << "," << "1" // rank multiplier. - << "," << "0" // rank only on distance. - << "," << x_aspect // x aspect. - << ")"; - _location_string = loc.str(); +static GeoLocation::Box convert(const Rectangle &rect) { + GeoLocation::Range x_range{rect.left, rect.right}; + GeoLocation::Range y_range{rect.top, rect.bottom}; + return GeoLocation::Box{x_range, y_range}; } +Location::Location(const Point &p, uint32_t max_dist, uint32_t aspect) + : Parent(p, max_dist, GeoLocation::Aspect(aspect)) +{} + Location::Location(const Rectangle &rect, - const Point &point, uint32_t max_dist, uint32_t x_aspect) -{ - asciistream loc; - loc << "(2" // dimensionality - << "," << point.x - << "," << point.y - << "," << max_dist - << "," << "0" // table id. - << "," << "1" // rank multiplier. - << "," << "0" // rank only on distance. - << "," << x_aspect // x aspect. - << ")"; - loc << "[2," << rect.left - << "," << rect.top - << "," << rect.right - << "," << rect.bottom - << "]" ; - _location_string = loc.str(); + const Point &p, uint32_t max_dist, uint32_t aspect) + : Parent(convert(rect), p, max_dist, GeoLocation::Aspect(aspect)) +{} -} +Location::Location(const Rectangle &rect) + : Parent(convert(rect)) +{} -Location::Location(const Rectangle &rect) { - asciistream loc; - loc << "[2," << rect.left - << "," << rect.top - << "," << rect.right - << "," << rect.bottom - << "]" ; - _location_string = loc.str(); +bool +Location::operator==(const Location &other) const +{ + auto me = getOldFormatString(); + auto it = other.getOldFormatString(); + if (me == it) { + return true; + } else { + // dump 'me' and 'it' here if unit tests fail + return false; + } +} + +std::string +Location::getOldFormatString() const +{ + // we need to product what search::common::GeoLocationParser can parse + vespalib::asciistream buf; + if (has_point) { + buf << "(2" // dimensionality + << "," << point.x + << "," << point.y + << "," << radius + << "," << "0" // table id. + << "," << "1" // rank multiplier. + << "," << "0" // rank only on distance. + << "," << x_aspect.multiplier // aspect multiplier + << ")"; + } + if (bounding_box.active()) { + buf << "[2," << bounding_box.x.low + << "," << bounding_box.y.low + << "," << bounding_box.x.high + << "," << bounding_box.y.high + << "]" ; + } + return buf.str(); } vespalib::asciistream &operator<<(vespalib::asciistream &out, const Location &loc) { - return out << loc.getLocationString(); + return out << loc.getOldFormatString(); } } diff --git a/searchlib/src/vespa/searchlib/query/tree/location.h b/searchlib/src/vespa/searchlib/query/tree/location.h index e1826c7184a..6b8090f45e1 100644 --- a/searchlib/src/vespa/searchlib/query/tree/location.h +++ b/searchlib/src/vespa/searchlib/query/tree/location.h @@ -2,29 +2,26 @@ #pragma once -#include <vespa/vespalib/stllike/string.h> +#include <string> +#include <vespa/searchlib/common/geo_location_spec.h> +#include "point.h" +#include "rectangle.h" namespace vespalib { class asciistream; } namespace search::query { -struct Point; -struct Rectangle; - -class Location { - vespalib::string _location_string; +class Location : public search::common::GeoLocation { + using Parent = search::common::GeoLocation; public: - Location() : _location_string() {} + Location() {} + Location(const Parent &spec) : Parent(spec) {} + ~Location() {} Location(const Point &p, uint32_t dist, uint32_t x_asp); Location(const Rectangle &rect); Location(const Rectangle &rect, const Point &p, uint32_t dist, uint32_t x_asp); - Location(const vespalib::string &s) : _location_string(s) {} - bool operator==(const Location &other) const { - return _location_string == other._location_string; - } - const vespalib::string &getLocationString() const { - return _location_string; - } + bool operator==(const Location &other) const; + std::string getOldFormatString() const; }; vespalib::asciistream &operator<<(vespalib::asciistream &out, const Location &loc); diff --git a/searchlib/src/vespa/searchlib/query/tree/point.h b/searchlib/src/vespa/searchlib/query/tree/point.h index 89d0bc1db44..48700681158 100644 --- a/searchlib/src/vespa/searchlib/query/tree/point.h +++ b/searchlib/src/vespa/searchlib/query/tree/point.h @@ -3,18 +3,10 @@ #pragma once #include <cstdint> +#include <vespa/searchlib/common/geo_location.h> namespace search::query { -struct Point { - int64_t x; - int64_t y; - Point() : x(0), y(0) {} - Point(int64_t x_in, int64_t y_in) : x(x_in), y(y_in) {} -}; - -inline bool operator==(const Point &p1, const Point &p2) { - return p1.x == p2.x && p1.y == p2.y; -} +using Point = search::common::GeoLocation::Point; } diff --git a/searchlib/src/vespa/searchlib/query/tree/rectangle.h b/searchlib/src/vespa/searchlib/query/tree/rectangle.h index 97be9ddeb32..358e994aacd 100644 --- a/searchlib/src/vespa/searchlib/query/tree/rectangle.h +++ b/searchlib/src/vespa/searchlib/query/tree/rectangle.h @@ -5,20 +5,14 @@ namespace search::query { struct Rectangle { - int64_t left; - int64_t top; - int64_t right; - int64_t bottom; + int32_t left; + int32_t top; + int32_t right; + int32_t bottom; Rectangle() : left(0), top(0), right(0), bottom(0) {} - Rectangle(int64_t l, int64_t t, int64_t r, int64_t b) + Rectangle(int32_t l, int32_t t, int32_t r, int32_t b) : left(l), top(t), right(r), bottom(b) {} }; -inline bool operator==(const Rectangle &r1, const Rectangle &r2) { - return r1.left == r2.left && r1.right == r2.right - && r1.top == r2.top && r1.bottom == r2.bottom; } - -} - diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index f33520d8b0e..82302e4ab48 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -228,7 +228,7 @@ class QueryNodeConverter : public QueryVisitor { } void visit(LocationTerm &node) override { - createTerm(node, ParseItem::ITEM_LOCATION_TERM); + createTerm(node, ParseItem::ITEM_GEO_LOCATION_TERM); } void visit(PrefixTerm &node) override { diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index 898db9785f6..a7e00d41555 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -6,6 +6,7 @@ #include "querybuilder.h" #include "term.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> +#include <vespa/searchlib/common/geo_location_parser.h> #include <vespa/vespalib/objects/hexdump.h> namespace search::query { @@ -141,17 +142,15 @@ private: t = &builder.addStringTerm(term, view, id, weight); } else if (type == ParseItem::ITEM_SUFFIXTERM) { t = &builder.addSuffixTerm(term, view, id, weight); - } else if (type == ParseItem::ITEM_LOCATION_TERM) { - Location loc(term); + } else if (type == ParseItem::ITEM_GEO_LOCATION_TERM) { + search::common::GeoLocationParser parser; + parser.parseOldFormat(term); + Location loc(parser.getGeoLocation()); t = &builder.addLocationTerm(loc, view, id, weight); } else if (type == ParseItem::ITEM_NUMTERM) { if (term[0] == '[' || term[0] == '<' || term[0] == '>') { Range range(term); t = &builder.addRangeTerm(range, view, id, weight); - } else if (term[0] == '(') { - // TODO: handled above, should remove this block - Location loc(term); - t = &builder.addLocationTerm(loc, view, id, weight); } else { t = &builder.addNumberTerm(term, view, id, weight); } diff --git a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp index 0ac2f09e1b0..55c363a12c1 100644 --- a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp +++ b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp @@ -167,7 +167,6 @@ public: ~StateCallback() {} void FillSummaryFeatures(GetDocsumsState*, IDocsumEnvironment*) override {} void FillRankFeatures(GetDocsumsState*, IDocsumEnvironment*) override {} - void ParseLocation(GetDocsumsState*) override {} std::unique_ptr<MatchingElements> fill_matching_elements(const MatchingElementsFields&) override { auto result = std::make_unique<MatchingElements>(); result->add_matching_elements(doc_id, _field_name, _matching_elements); diff --git a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp index 6fd0c39f06f..683bab49353 100644 --- a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp +++ b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp @@ -108,7 +108,6 @@ public: struct MyGetDocsumsStateCallback : GetDocsumsStateCallback { virtual void FillSummaryFeatures(GetDocsumsState *, IDocsumEnvironment *) override {} virtual void FillRankFeatures(GetDocsumsState *, IDocsumEnvironment *) override {} - virtual void ParseLocation(GetDocsumsState *) override {} std::unique_ptr<MatchingElements> fill_matching_elements(const MatchingElementsFields &) override { abort(); } }; diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index 6fceef37f09..69249056c17 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -77,7 +77,6 @@ struct DocsumFixture : IDocsumStore, GetDocsumsStateCallback { uint32_t getSummaryClassId() const override { return 0; } void FillSummaryFeatures(GetDocsumsState *, IDocsumEnvironment *) override { } void FillRankFeatures(GetDocsumsState *, IDocsumEnvironment *) override { } - void ParseLocation(GetDocsumsState *) override { } std::unique_ptr<MatchingElements> fill_matching_elements(const search::MatchingElementsFields &) override { abort(); } }; diff --git a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp index 87317234a27..cd695984e03 100644 --- a/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp +++ b/searchsummary/src/tests/extractkeywords/extractkeywordstest.cpp @@ -179,7 +179,7 @@ ExtractKeywordsTest::RunTest(int testno, bool verify) case 1: { // check that skipping these works also: - stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_LOCATION_TERM, "no")); + stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_GEO_LOCATION_TERM, "no")); stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_NEAREST_NEIGHBOR, "no")); // multi term query stack.Push(new search::SimpleQueryStackItem(search::ParseItem::ITEM_TERM, "foobar")); diff --git a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp index 5a4b6d76b8f..f12822949f9 100644 --- a/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp +++ b/searchsummary/src/tests/extractkeywords/simplequerystackitem.cpp @@ -24,7 +24,7 @@ void assert_term_type(ParseItem::ItemType type) { assert(type == ParseItem::ITEM_TERM || type == ParseItem::ITEM_NUMTERM || type == ParseItem::ITEM_NEAREST_NEIGHBOR || - type == ParseItem::ITEM_LOCATION_TERM || + type == ParseItem::ITEM_GEO_LOCATION_TERM || type == ParseItem::ITEM_PREFIXTERM || type == ParseItem::ITEM_SUBSTRINGTERM || type == ParseItem::ITEM_SUFFIXTERM || @@ -152,7 +152,7 @@ SimpleQueryStackItem::AppendBuffer(RawBuf *buf) const break; case ITEM_TERM: case ITEM_NUMTERM: - case ITEM_LOCATION_TERM: + case ITEM_GEO_LOCATION_TERM: case ITEM_PREFIXTERM: case ITEM_SUBSTRINGTERM: case ITEM_EXACTSTRINGTERM: diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp index ebbf97e9f55..42afadb386a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.cpp @@ -2,11 +2,19 @@ #include "docsumstate.h" #include <vespa/juniper/rpinterface.h> +#include <vespa/document/datatype/positiondatatype.h> #include <vespa/searchcommon/attribute/iattributecontext.h> -#include <vespa/searchlib/common/location.h> +#include <vespa/searchlib/common/geo_location.h> +#include <vespa/searchlib/common/geo_location_parser.h> +#include <vespa/searchlib/common/geo_location_spec.h> #include <vespa/searchlib/common/matching_elements.h> +#include <vespa/searchlib/parsequery/parse.h> +#include <vespa/searchlib/parsequery/stackdumpiterator.h> #include "docsum_field_writer_state.h" +#include <vespa/log/log.h> +LOG_SETUP(".searchsummary.docsummary.docsumstate"); + namespace search::docsummary { GetDocsumsState::GetDocsumsState(GetDocsumsStateCallback &callback) @@ -22,7 +30,7 @@ GetDocsumsState::GetDocsumsState(GetDocsumsStateCallback &callback) _attributes(), _fieldWriterStates(), _jsonStringer(), - _parsedLocation(), + _parsedLocations(), _summaryFeatures(NULL), _summaryFeaturesCached(false), _rankFeatures(NULL), @@ -58,4 +66,43 @@ GetDocsumsState::get_matching_elements(const MatchingElementsFields &matching_el return *_matching_elements; } +void +GetDocsumsState::parse_locations() +{ + using document::PositionDataType; + assert(_parsedLocations.empty()); // only allowed to call this once + if (! _args.getLocation().empty()) { + search::common::GeoLocationParser parser; + if (parser.parseOldFormatWithField(_args.getLocation())) { + auto view = parser.getFieldName(); + auto attr_name = PositionDataType::getZCurveFieldName(view); + search::common::GeoLocationSpec spec{attr_name, parser.getGeoLocation()}; + _parsedLocations.push_back(spec); + } else { + LOG(warning, "could not parse location string '%s' from request", + _args.getLocation().c_str()); + } + } + auto stackdump = _args.getStackDump(); + if (! stackdump.empty()) { + search::SimpleQueryStackDumpIterator iterator(stackdump); + while (iterator.next()) { + if (iterator.getType() == search::ParseItem::ITEM_GEO_LOCATION_TERM) { + vespalib::string view = iterator.getIndexName(); + vespalib::string term = iterator.getTerm(); + search::common::GeoLocationParser parser; + if (parser.parseOldFormat(term)) { + auto attr_name = PositionDataType::getZCurveFieldName(view); + search::common::GeoLocationSpec spec{attr_name, parser.getGeoLocation()}; + _parsedLocations.push_back(spec); + } else { + LOG(warning, "could not parse location string '%s' from stack dump", + term.c_str()); + } + } + } + } +} + + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h index 57cae341682..46f8e52dd37 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstate.h @@ -5,6 +5,7 @@ #include <vespa/searchlib/util/rawbuf.h> #include <vespa/searchsummary/docsummary/getdocsumargs.h> #include <vespa/searchlib/common/featureset.h> +#include <vespa/searchlib/common/geo_location_spec.h> #include <vespa/vespalib/util/jsonwriter.h> namespace juniper { @@ -34,7 +35,6 @@ class GetDocsumsStateCallback public: virtual void FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) = 0; virtual void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) = 0; - virtual void ParseLocation(GetDocsumsState * state) = 0; virtual std::unique_ptr<MatchingElements> fill_matching_elements(const MatchingElementsFields &matching_elems_fields) = 0; virtual ~GetDocsumsStateCallback(void) { } GetDocsumsStateCallback(const GetDocsumsStateCallback &) = delete; @@ -80,7 +80,8 @@ public: vespalib::JSONStringer _jsonStringer; // used by AbsDistanceDFW - std::unique_ptr<search::common::Location> _parsedLocation; + std::vector<search::common::GeoLocationSpec> _parsedLocations; + void parse_locations(); // used by SummaryFeaturesDFW FeatureSet::SP _summaryFeatures; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 37239fe9da6..8cc577355cf 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -270,7 +270,7 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const case search::ParseItem::ITEM_PREDICATE_QUERY: case search::ParseItem::ITEM_SAME_ELEMENT: case search::ParseItem::ITEM_NEAREST_NEIGHBOR: - case search::ParseItem::ITEM_LOCATION_TERM: + case search::ParseItem::ITEM_GEO_LOCATION_TERM: if (!v->VisitOther(&item, iterator.getArity())) { rc = SkipItem(&iterator); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp index 4e1544ee5d7..0af92adf2d2 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.cpp @@ -8,6 +8,7 @@ GetDocsumArgs::GetDocsumArgs() : _ranking(), _resultClassName(), _dumpFeatures(false), + _locations_possible(true), _stackItems(0), _stackDump(), _location(), @@ -27,6 +28,7 @@ GetDocsumArgs::initFromDocsumRequest(const search::engine::DocsumRequest &req) _stackItems = req.stackItems; _stackDump = req.stackDump; _location = req.location; + _locations_possible = true; _timeout = req.getTimeLeft(); _propertiesMap = req.propertiesMap; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h index c17f44baec9..0231b004674 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/getdocsumargs.h @@ -17,6 +17,7 @@ private: vespalib::string _ranking; vespalib::string _resultClassName; bool _dumpFeatures; + bool _locations_possible; uint32_t _stackItems; std::vector<char> _stackDump; vespalib::string _location; @@ -31,15 +32,14 @@ public: void SetRankProfile(const vespalib::string &ranking) { _ranking = ranking; } void setResultClassName(vespalib::stringref name) { _resultClassName = name; } void SetStackDump(uint32_t stackItems, uint32_t stackDumpLen, const char *stackDump); - void setLocation(vespalib::stringref location) { - _location = location; - } - + void locations_possible(bool value) { _locations_possible = value; } + bool locations_possible() const { return _locations_possible; } + const vespalib::string &getLocation() const { return _location; } + void setLocation(const vespalib::string & location) { _location = location; } void setTimeout(vespalib::duration timeout) { _timeout = timeout; } vespalib::duration getTimeout() const { return _timeout; } const vespalib::string & getResultClassName() const { return _resultClassName; } - const vespalib::string & getLocation() const { return _location; } const vespalib::stringref getStackDump() const { return vespalib::stringref(&_stackDump[0], _stackDump.size()); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp index ecdde13b919..4fc2b1f4221 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp @@ -22,44 +22,57 @@ using search::attribute::BasicType; using search::attribute::IntegerContent; using search::common::Location; +LocationAttrDFW::AllLocations +LocationAttrDFW::getAllLocations(GetDocsumsState *state) +{ + AllLocations retval; + if (! state->_args.locations_possible()) { + return retval; + } + if (state->_parsedLocations.empty()) { + state->parse_locations(); + } + for (const auto & loc : state->_parsedLocations) { + if (loc.location.valid()) { + LOG(debug, "found location(field %s) for DFW(field %s)\n", + loc.field_name.c_str(), getAttributeName().c_str()); + if (getAttributeName() == loc.field_name) { + retval.matching.push_back(&loc.location); + } else { + retval.other.push_back(&loc.location); + } + } + } + if (retval.empty()) { + // avoid doing things twice + state->_args.locations_possible(false); + } + return retval; +} + AbsDistanceDFW::AbsDistanceDFW(const vespalib::string & attrName) : - AttrDFW(attrName) + LocationAttrDFW(attrName) { } uint64_t -AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState *state) +AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState *state, + const std::vector<const GeoLoc *> &locations) { - search::common::Location &location = *state->_parsedLocation; - const auto& attribute = get_attribute(*state); - uint64_t absdist = std::numeric_limits<int64_t>::max(); - int32_t docx = 0; - int32_t docy = 0; - IntegerContent pos; - pos.fill(attribute, docid); - uint32_t numValues = pos.size(); - for (uint32_t i = 0; i < numValues; i++) { - int64_t docxy(pos[i]); - vespalib::geo::ZCurve::decode(docxy, &docx, &docy); - uint32_t dx; - if (location.getX() > docx) { - dx = location.getX() - docx; - } else { - dx = docx - location.getX(); - } - if (location.getXAspect() != 0) { - dx = ((uint64_t) dx * location.getXAspect()) >> 32; - } - uint32_t dy; - if (location.getY() > docy) { - dy = location.getY() - docy; - } else { - dy = docy - location.getY(); - } - uint64_t dist2 = dx * (uint64_t) dx + - dy * (uint64_t) dy; - if (dist2 < absdist) { - absdist = dist2; + const auto& attribute = get_attribute(*state); + for (auto location : locations) { + int32_t docx = 0; + int32_t docy = 0; + IntegerContent pos; + pos.fill(attribute, docid); + uint32_t numValues = pos.size(); + for (uint32_t i = 0; i < numValues; i++) { + int64_t docxy(pos[i]); + vespalib::geo::ZCurve::decode(docxy, &docx, &docy); + uint64_t dist2 = location->sq_distance_to(GeoLoc::Point{docx, docy}); + if (dist2 < absdist) { + absdist = dist2; + } } } return (uint64_t) std::sqrt((double) absdist); @@ -68,22 +81,11 @@ AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState *state) void AbsDistanceDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) { - bool forceEmpty = true; - - const vespalib::string &locationStr = state->_args.getLocation(); - if (locationStr.size() > 0) { - if (!state->_parsedLocation) { - state->_callback.ParseLocation(state); - } - assert(state->_parsedLocation); - if (state->_parsedLocation->getParseError() == nullptr) { - forceEmpty = false; - } + const auto & all_locations = getAllLocations(state); + if (all_locations.empty()) { + return; } - if (forceEmpty) return; - - uint64_t absdist = findMinDistance(docid, state); - + uint64_t absdist = findMinDistance(docid, state, all_locations.best()); if (type == RES_INT) { target.insertLong(absdist); } else { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h index 999da6f1860..c135737e44c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h @@ -3,13 +3,40 @@ #pragma once #include "attributedfw.h" +#include <vespa/searchlib/common/geo_location_spec.h> namespace search::docsummary { -class AbsDistanceDFW : public AttrDFW +class LocationAttrDFW : public AttrDFW +{ +public: + using GeoLoc = search::common::GeoLocation; + + LocationAttrDFW(const vespalib::string & attrName) + : AttrDFW(attrName) + {} + + struct AllLocations { + std::vector<const GeoLoc *> matching; + std::vector<const GeoLoc *> other; + + ~AllLocations() {} + + bool empty() const { + return matching.empty() && other.empty(); + } + const std::vector<const GeoLoc *> & best() const { + return matching.empty() ? other : matching; + } + }; + AllLocations getAllLocations(GetDocsumsState *state); +}; + +class AbsDistanceDFW : public LocationAttrDFW { private: - uint64_t findMinDistance(uint32_t docid, GetDocsumsState *state); + uint64_t findMinDistance(uint32_t docid, GetDocsumsState *state, + const std::vector<const GeoLoc *> &locations); public: AbsDistanceDFW(const vespalib::string & attrName); diff --git a/searchsummary/src/vespa/searchsummary/test/mock_state_callback.h b/searchsummary/src/vespa/searchsummary/test/mock_state_callback.h index b3ee405c856..f8b51ca14d0 100644 --- a/searchsummary/src/vespa/searchsummary/test/mock_state_callback.h +++ b/searchsummary/src/vespa/searchsummary/test/mock_state_callback.h @@ -18,7 +18,6 @@ public: ~MockStateCallback() override { } void FillSummaryFeatures(GetDocsumsState*, IDocsumEnvironment*) override { } void FillRankFeatures(GetDocsumsState*, IDocsumEnvironment*) override { } - void ParseLocation(GetDocsumsState*) override { } std::unique_ptr<MatchingElements> fill_matching_elements(const search::MatchingElementsFields&) override { return std::make_unique<MatchingElements>(_matching_elems); } diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp index 37809b207ad..6096a2faea4 100644 --- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.cpp @@ -1,7 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "queryenvironment.h" -#include <vespa/searchlib/common/location.h> +#include <vespa/searchlib/common/geo_location.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/searchlib/common/geo_location_parser.h> #include <vespa/log/log.h> LOG_SETUP(".searchvisitor.queryenvironment"); @@ -21,26 +23,20 @@ parseLocation(const string & location_str) if (location_str.empty()) { return fefLocation; } - string::size_type pos = location_str.find(':'); - if (pos == string::npos) { - LOG(warning, "Location string lacks attribute vector specification. loc='%s'. Location ignored.", - location_str.c_str()); - return fefLocation; - } - string attr = location_str.substr(0, pos); - const string location = location_str.substr(pos + 1); - - search::common::Location locationSpec; - if (!locationSpec.parse(location)) { + search::common::GeoLocationParser locationParser; + if (!locationParser.parseOldFormatWithField(location_str)) { LOG(warning, "Location parse error (location: '%s'): %s. Location ignored.", - location.c_str(), locationSpec.getParseError()); + location_str.c_str(), locationParser.getParseError()); return fefLocation; } - fefLocation.setAttribute(attr); - fefLocation.setXPosition(locationSpec.getX()); - fefLocation.setYPosition(locationSpec.getY()); - fefLocation.setXAspect(locationSpec.getXAspect()); - fefLocation.setValid(true); + auto location = locationParser.getGeoLocation(); + if (location.has_point) { + fefLocation.setAttribute(locationParser.getFieldName()); + fefLocation.setXPosition(location.point.x); + fefLocation.setYPosition(location.point.y); + fefLocation.setXAspect(location.x_aspect.multiplier); + fefLocation.setValid(true); + } return fefLocation; } diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h index f580cec8870..e3da5a44167 100644 --- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h @@ -49,7 +49,13 @@ public: } // inherit documentation - virtual const search::fef::Location & getLocation() const override { return _location; } + std::vector<const search::fef::Location *> getAllLocations() const override { + std::vector<const search::fef::Location *> retval; + if (_location.isValid()) { + retval.push_back(&_location); + } + return retval; + } // inherit documentation virtual const search::attribute::IAttributeContext & getAttributeContext() const override { return *_attrCtx; } diff --git a/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java b/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java index 9f3d3b837f8..58164801c7c 100644 --- a/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java +++ b/vespajlib/src/main/java/com/yahoo/geo/DegreesParser.java @@ -2,7 +2,7 @@ package com.yahoo.geo; /** - * utility for parsing geographical coordinates + * Utility for parsing geographical coordinates * * @author arnej27959 **/ diff --git a/vespajlib/src/main/java/com/yahoo/geo/DistanceParser.java b/vespajlib/src/main/java/com/yahoo/geo/DistanceParser.java new file mode 100644 index 00000000000..1ae68afa4ac --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/DistanceParser.java @@ -0,0 +1,79 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.geo; + +import com.google.common.annotations.Beta; + +/** + * Utility for parsing a geographical distance with unit. + **/ +@Beta +public class DistanceParser { + // according to wikipedia: + // Earth's equatorial radius = 6378137 meter - not used + // meters per mile = 1609.344 + // 180 degrees equals one half diameter equals PI*r + // Earth's polar radius = 6356752 meter + + public final static double m2deg = 180.0 / (Math.PI * 6356752.0); + public final static double km2deg = 1000.000 * 180.0 / (Math.PI * 6356752.0); + public final static double mi2deg = 1609.344 * 180.0 / (Math.PI * 6356752.0); + + private final double degrees; + + public double getDegrees() { return degrees; } + + /** + * Parse a distance in some kind of units, converting to geographical degrees. + * Note that the number and the unit should be separated by a single space only, + * or not separated at all. + * Supported units are "m", "km", "miles", and "deg", + * the last one meaning degrees with no conversion. + * For brevity "mi" = "miles" and "d" = "deg". + **/ + static public double parse(String distance) { + var parser = new DistanceParser(distance, false); + return parser.degrees; + } + + DistanceParser(String distance, boolean assumeMicroDegrees) { + if (distance.endsWith(" km")) { + double km = Double.valueOf(distance.substring(0, distance.length()-3)); + degrees = km * km2deg; + } else if (distance.endsWith(" m")) { + double meters = Double.valueOf(distance.substring(0, distance.length()-2)); + degrees = meters * m2deg; + } else if (distance.endsWith(" miles")) { + double miles = Double.valueOf(distance.substring(0, distance.length()-6)); + degrees = miles * mi2deg; + } else if (distance.endsWith(" mi")) { + double miles = Double.valueOf(distance.substring(0, distance.length()-3)); + degrees = miles * mi2deg; + } else if (distance.endsWith(" deg")) { + degrees = Double.valueOf(distance.substring(0, distance.length()-4)); + } else if (distance.endsWith(" d")) { + degrees = Double.valueOf(distance.substring(0, distance.length()-2)); + } else if (distance.endsWith("km")) { + double km = Double.valueOf(distance.substring(0, distance.length()-2)); + degrees = km * km2deg; + } else if (distance.endsWith("m")) { + double meters = Double.valueOf(distance.substring(0, distance.length()-1)); + degrees = meters * m2deg; + } else if (distance.endsWith("miles")) { + double miles = Double.valueOf(distance.substring(0, distance.length()-5)); + degrees = miles * mi2deg; + } else if (distance.endsWith("mi")) { + double miles = Double.valueOf(distance.substring(0, distance.length()-2)); + degrees = miles * mi2deg; + } else if (distance.endsWith("deg")) { + degrees = Double.valueOf(distance.substring(0, distance.length()-3)); + } else if (distance.endsWith("d")) { + degrees = Double.valueOf(distance.substring(0, distance.length()-1)); + } else if (assumeMicroDegrees) { + degrees = Integer.parseInt(distance) * 0.000001; + } else { + throw new IllegalArgumentException("missing unit for distance: "+distance); + } + } + +} diff --git a/vespajlib/src/main/java/com/yahoo/geo/OneDegreeParser.java b/vespajlib/src/main/java/com/yahoo/geo/OneDegreeParser.java new file mode 100644 index 00000000000..cf23a24e702 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/OneDegreeParser.java @@ -0,0 +1,282 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.geo; + +/** + * Utility for parsing one geographical coordinate + * + * @author arnej27959 + **/ +class OneDegreeParser { + /** + * the parsed latitude (degrees north if positive) + **/ + public double latitude = 0; + public boolean foundLatitude = false; + + /** + * the parsed longitude (degrees east if positive) + **/ + public double longitude = 0; + public boolean foundLongitude = false; + + public static boolean isDigit(char ch) { + return (ch >= '0' && ch <= '9'); + } + public static boolean isCompassDirection(char ch) { + return (ch == 'N' || ch == 'S' || ch == 'E' || ch == 'W'); + } + + private String parseString = null; + private int len = 0; + private int pos = 0; + + public String toString() { + if (foundLatitude) { + return parseString + " -> latitude(" + latitude + ")"; + } else { + return parseString + " -> longitude(" + longitude + ")"; + } + } + + private char getNextChar() throws IllegalArgumentException { + if (pos == len) { + pos++; + return 0; + } else if (pos > len) { + throw new IllegalArgumentException("position after end of string when parsing <"+parseString+">"); + } else { + return parseString.charAt(pos++); + } + } + + /** + * Parse the given string. + * + * The string must contain either a latitude or a longitude. + * A latitude should contain "N" or "S" and a number signifying + * degrees north or south, or a signed number. + * A longitude should contain "E" or "W" and a number + * signifying degrees east or west, or a signed number. + * <br> + * Fractional degrees are recommended as the main input format, + * but degrees plus fractional minutes may be used for testing. + * You can use the degree sign (U+00B0 as seen in unicode at + * http://www.unicode.org/charts/PDF/U0080.pdf) to separate + * degrees from minutes, put the direction (NSEW) between as a + * separator, or use a small letter 'o' as a replacement for the + * degrees sign. + * <br> + * Some valid input formats: <br> + * "37.416383" and "-122.024683" → Sunnyvale <br> + * "N37.416383" and "W122.024683" → Sunnyvale <br> + * "37N24.983" and "122W01.481" → same <br> + * "N37\u00B024.983" and "W122\u00B001.481" → same <br> + * "63.418417" and "10.433033" → Trondheim <br> + * "N63.418417" and "E10.433033" → same <br> + * "N63o25.105" and "E10o25.982" → same <br> + * "E10o25.982" and "N63o25.105" → same <br> + * "N63.418417" and "E10.433033" → same <br> + * "63N25.105" and "10E25.982" → same <br> + * @param assumeNorthSouth Latitude assumed, otherwise longitude + * @param toParse Latitude or longitude string to parse + * + **/ + public OneDegreeParser(boolean assumeNorthSouth, String toParse) throws IllegalArgumentException { + this.parseString = toParse; + this.len = parseString.length(); + consumeString(assumeNorthSouth); + } + + private void consumeString(boolean assumeNorthSouth) throws IllegalArgumentException { + char ch = getNextChar(); + + double degrees = 0.0; + double minutes = 0.0; + double seconds = 0.0; + boolean degSet = false; + boolean minSet = false; + boolean secSet = false; + boolean dirSet = false; + boolean foundDot = false; + boolean foundDigits = false; + + boolean findingLatitude = false; + boolean findingLongitude = false; + + double sign = +1.0; + + int lastpos = -1; + + // sign must be first character in string if present: + if (ch == '+') { + // unary plus is a nop + ch = getNextChar(); + } else if (ch == '-') { + sign = -1.0; + ch = getNextChar(); + } + do { + // did we find a valid char? + boolean valid = false; + if (pos == lastpos) { + throw new IllegalArgumentException("internal logic error at <"+parseString+"> pos:"+pos); + } else { + lastpos = pos; + } + + // first, see if we can find some number + double accum = 0.0; + + if (isDigit(ch) || ch == '.') { + valid = true; + if (foundDigits) { + throw new IllegalArgumentException("found digits after not consuming previous digits when parsing <"+parseString+">"); + } + double divider = 1.0; + foundDot = false; + while (isDigit(ch)) { + foundDigits = true; + accum *= 10; + accum += (ch - '0'); + ch = getNextChar(); + } + if (ch == '.') { + foundDot = true; + ch = getNextChar(); + while (isDigit(ch)) { + foundDigits = true; + accum *= 10; + accum += (ch - '0'); + divider *= 10; + ch = getNextChar(); + } + } + if (!foundDigits) { + throw new IllegalArgumentException("just a . is not a valid number when parsing <"+parseString+">"); + } + accum /= divider; + } + + // next, did we find a separator after the number? + // degree sign is a separator after degrees, before minutes + if (ch == '\u00B0' || ch == 'o') { + valid = true; + if (degSet) { + throw new IllegalArgumentException("degrees sign only valid just after degrees when parsing <"+parseString+">"); + } + if (!foundDigits) { + throw new IllegalArgumentException("must have number before degrees sign when parsing <"+parseString+">"); + } + if (foundDot) { + throw new IllegalArgumentException("cannot have fractional degrees before degrees sign when parsing <"+parseString+">"); + } + ch = getNextChar(); + } + // apostrophe is a separator after minutes, before seconds + if (ch == '\'') { + if (minSet || !degSet || !foundDigits) { + throw new IllegalArgumentException("minutes sign only valid just after minutes when parsing <"+parseString+">"); + } + if (foundDot) { + throw new IllegalArgumentException("cannot have fractional minutes before minutes sign when parsing <"+parseString+">"); + } + ch = getNextChar(); + } + + // if we found some number, assign it into the next unset variable + if (foundDigits) { + valid = true; + if (degSet) { + if (minSet) { + if (secSet) { + throw new IllegalArgumentException("extra number after full field when parsing <"+parseString+">"); + } else { + seconds = accum; + secSet = true; + } + } else { + minutes = accum; + minSet = true; + if (foundDot) { + secSet = true; + } + } + } else { + degrees = accum; + degSet = true; + if (foundDot) { + minSet = true; + secSet = true; + } + } + foundDot = false; + foundDigits = false; + } + + // there may to be a direction (NSEW) somewhere, too + if (isCompassDirection(ch)) { + valid = true; + if (dirSet) { + throw new IllegalArgumentException("already set direction once, cannot add direction: "+ch+" when parsing <"+parseString+">"); + } + dirSet = true; + if (ch == 'S' || ch == 'W') { + sign = -1; + } else { + sign = 1; + } + if (ch == 'E' || ch == 'W') { + findingLongitude = true; + } else { + findingLatitude = true; + } + ch = getNextChar(); + } + + // lastly, did we find the end-of-string? + if (ch == 0) { + valid = true; + if (!dirSet) { + if (assumeNorthSouth) { + findingLatitude = true; + } else { + findingLongitude = true; + } + } + if (!degSet) { + throw new IllegalArgumentException("end of field without any number seen when parsing <"+parseString+">"); + } + degrees += minutes / 60.0; + degrees += seconds / 3600.0; + degrees *= sign; + + if (findingLatitude) { + if (degrees < -90.0 || degrees > 90.0) { + throw new IllegalArgumentException("out of range [-90,+90]: "+degrees+" when parsing <"+parseString+">"); + } + latitude = degrees; + foundLatitude = true; + } else if (findingLongitude) { + if (degrees < -180.0 || degrees > 180.0) { + throw new IllegalArgumentException("out of range [-180,+180]: "+degrees+" when parsing <"+parseString+">"); + } + longitude = degrees; + foundLongitude = true; + } + break; + } + if (!valid) { + throw new IllegalArgumentException("invalid character: "+ch+" when parsing <"+parseString+">"); + } + } while (ch != 0); + // everything parsed OK + if (foundLatitude && foundLongitude) { + throw new IllegalArgumentException("found both latitude and longitude from: "+parseString); + } + if (foundLatitude || foundLongitude) { + return; + } + throw new IllegalArgumentException("found neither latitude nor longitude from: "+parseString); + } +} diff --git a/vespajlib/src/main/java/com/yahoo/geo/ParsedDegree.java b/vespajlib/src/main/java/com/yahoo/geo/ParsedDegree.java new file mode 100644 index 00000000000..84b87614182 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/geo/ParsedDegree.java @@ -0,0 +1,57 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.geo; + +/** + * Utility for holding one geographical coordinate + * + * @author arnej27959 + **/ +public class ParsedDegree { + /** + * the parsed latitude or longitude + * Degrees north or east if positive + * Degrees south or west if negative + **/ + public final double degrees; + + // one of these two flag will be true: + public final boolean isLatitude; + public final boolean isLongitude; + + public ParsedDegree(double value, boolean isLat, boolean isLon) { + this.degrees = value; + this.isLatitude = isLat; + this.isLongitude = isLon; + if (isLat && isLon) { + throw new IllegalArgumentException("value cannot be both latitude and longitude at once"); + } + if (isLat || isLon) { + return; + } + throw new IllegalArgumentException("value must be either latitude or longitude"); + } + + static public ParsedDegree fromString(String toParse, boolean assumeLatitude, boolean assumeLongitude) { + if (assumeLatitude && assumeLongitude) { + throw new IllegalArgumentException("value cannot be both latitude and longitude at once"); + } + var parser = new OneDegreeParser(assumeLatitude, toParse); + if (parser.foundLatitude) { + return new ParsedDegree(parser.latitude, true, false); + } + if (parser.foundLongitude) { + return new ParsedDegree(parser.longitude, false, true); + } + throw new IllegalArgumentException("could not parse: "+toParse); + } + + public String toString() { + if (isLatitude) { + return "Latitude: "+degrees+" degrees"; + } else { + return "Longitude: "+degrees+" degrees"; + } + } + +} diff --git a/vespajlib/src/main/java/com/yahoo/text/Utf8.java b/vespajlib/src/main/java/com/yahoo/text/Utf8.java index 6f40b590a64..cb8ca244fe2 100644 --- a/vespajlib/src/main/java/com/yahoo/text/Utf8.java +++ b/vespajlib/src/main/java/com/yahoo/text/Utf8.java @@ -13,10 +13,10 @@ import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; /** - * utility class with functions for handling UTF-8 + * Utility class with functions for handling UTF-8 * * @author arnej27959 - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen * @author baldersheim * */ diff --git a/vespajlib/src/test/java/com/yahoo/geo/OneDegreeParserTestCase.java b/vespajlib/src/test/java/com/yahoo/geo/OneDegreeParserTestCase.java new file mode 100644 index 00000000000..b0da6a0a131 --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/geo/OneDegreeParserTestCase.java @@ -0,0 +1,204 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.geo; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests for the OneDegreeParser class. + * + * @author arnej27959 + */ +public class OneDegreeParserTestCase { + + private static final double delta = 0.000000000001; + + private OneDegreeParser parser; + + private void checkLat(boolean assumeLatitude, String toParse, double expected) { + parser = new OneDegreeParser(assumeLatitude, toParse); + assertEquals(expected, parser.latitude, delta); + assertTrue(parser.foundLatitude); + assertFalse(parser.foundLongitude); + } + private void checkLon(boolean assumeLatitude, String toParse, double expected) { + parser = new OneDegreeParser(assumeLatitude, toParse); + assertEquals(expected, parser.longitude, delta); + assertFalse(parser.foundLatitude); + assertTrue(parser.foundLongitude); + } + private void checkLat(String toParse, double expected) { + checkLat(true, toParse, expected); + checkLat(false, toParse, expected); + } + private void checkLon(String toParse, double expected) { + checkLon(true, toParse, expected); + checkLon(false, toParse, expected); + } + + private void checkZeroLat(boolean assumeLatitude, String toParse) { + checkLat(assumeLatitude, toParse, 0d); + } + + private void checkZeroLon(boolean assumeLatitude, String toParse) { + checkLon(assumeLatitude, toParse, 0d); + } + + /** + * Tests different inputs that should all produce 0 or -0. + */ + @Test + public void testZero() { + checkZeroLat(true, "0"); + checkZeroLat(true, "0.0"); + checkZeroLat(true, "0o0.0"); + checkZeroLat(true, "0o0'0"); + checkZeroLat(true, "0\u00B00'0"); + + checkZeroLon(false, "0"); + checkZeroLon(false, "0.0"); + checkZeroLon(false, "0o0.0"); + checkZeroLon(false, "0o0'0"); + checkZeroLon(false, "0\u00B00'0"); + + checkZeroLat(false, "N0"); + checkZeroLat(false, "N0.0"); + checkZeroLat(false, "N0\u00B00'0"); + checkZeroLat(false, "S0"); + checkZeroLat(false, "S0.0"); + checkZeroLat(false, "S0o0'0"); + checkZeroLat(false, "S0\u00B00'0"); + + checkZeroLon(true, "E0"); + checkZeroLon(true, "E0.0"); + checkZeroLon(true, "E0\u00B00'0"); + checkZeroLon(true, "W0"); + checkZeroLon(true, "W0.0"); + checkZeroLon(true, "W0o0'0"); + checkZeroLon(true, "W0\u00B00'0"); + } + + /** + * Tests inputs that are close to 0. + */ + @Test + public void testNearZero() { + checkLat("N0.0001", 0.0001); + checkLat("S0.0001", -0.0001); + checkLon("E0.0001", 0.0001); + checkLon("W0.0001", -0.0001); + + checkLat("N0.000001", 0.000001); + checkLat("S0.000001", -0.000001); + checkLon("E0.000001", 0.000001); + checkLon("W0.000001", -0.000001); + + checkLat("N0\u00B00'1", 1/3600d); + checkLat("S0\u00B00'1", -1/3600d); + checkLon("E0\u00B00'1", 1/3600d); + checkLon("W0\u00B00'1", -1/3600d); + } + + /** + * Tests inputs that are close to latitude 90/-90 degrees and longitude 180/-180 degrees. + */ + @Test + public void testNearBoundary() { + checkLat("N89.9999", 89.9999); + checkLat("S89.9999", -89.9999); + checkLon("E179.9999", 179.9999); + checkLon("W179.9999", -179.9999); + + checkLat("N89.999999", 89.999999); + checkLat("S89.999999", -89.999999); + checkLon("E179.999999", 179.999999); + checkLon("W179.999999", -179.999999); + + checkLat("N89\u00B059'59", 89+59/60d+59/3600d); + checkLat("S89\u00B059'59", -(89+59/60d+59/3600d)); + checkLon("E179\u00B059'59", 179+59/60d+59/3600d); + checkLon("W179\u00B059'59", -(179+59/60d+59/3600d)); + } + + /** + * Tests inputs that are on latitude 90/-90 degrees and longitude 180/-180 degrees. + */ + @Test + public void testOnBoundary() { + checkLat("N90", 90d); + checkLat("N90\u00B00'0", 90d); + checkLat("S90", -90d); + checkLat("S90\u00B00'0", -90d); + + checkLon("E180", 180d); + checkLon("E180\u00B00'0", 180d); + checkLon("W180", -180d); + checkLon("W180\u00B00'0", -180d); + } + + private String parseException(boolean assumeLatitude, String toParse) { + String message = ""; + try { + parser = new OneDegreeParser(assumeLatitude, toParse); + assertTrue(false); + } catch (IllegalArgumentException e) { + message = e.getMessage(); + } + return message; + } + + /** + * Tests inputs that are above latitude 90/-90 degrees and longitude 180/-180 degrees. + */ + @Test + public void testAboveBoundary() { + String message = parseException(false, "N90.0001"); + assertEquals("out of range [-90,+90]: 90.0001 when parsing <N90.0001>", message); + message = parseException(false, "S90.0001"); + assertEquals("out of range [-90,+90]: -90.0001 when parsing <S90.0001>", message); + message = parseException(true, "E180.0001"); + assertEquals("out of range [-180,+180]: 180.0001 when parsing <E180.0001>", message); + message = parseException(true, "W180.0001"); + assertEquals("out of range [-180,+180]: -180.0001 when parsing <W180.0001>", message); + message = parseException(false, "N90.000001"); + assertEquals("out of range [-90,+90]: 90.000001 when parsing <N90.000001>", message); + message = parseException(false, "S90.000001"); + assertEquals("out of range [-90,+90]: -90.000001 when parsing <S90.000001>", message); + message = parseException(true, "E180.000001"); + assertEquals("out of range [-180,+180]: 180.000001 when parsing <E180.000001>", message); + message = parseException(true, "W180.000001"); + assertEquals("out of range [-180,+180]: -180.000001 when parsing <W180.000001>", message); + } + + /** + * Tests various inputs that contain syntax errors. + */ + @Test + public void testInputErrors() { + String message = parseException(false, "N90S90"); + assertEquals("already set direction once, cannot add direction: S when parsing <N90S90>", message); + message = parseException(false, "E120W120"); + assertEquals("already set direction once, cannot add direction: W when parsing <E120W120>", message); + message = parseException(false, "E"); + assertEquals("end of field without any number seen when parsing <E>", message); + message = parseException(false, ""); + assertEquals("end of field without any number seen when parsing <>", message); + message = parseException(false, "NW25"); + assertEquals("already set direction once, cannot add direction: W when parsing <NW25>", message); + message = parseException(false, "N16.25\u00B0"); + assertEquals("cannot have fractional degrees before degrees sign when parsing <N16.25\u00B0>", message); + message = parseException(false, "N16\u00B022.40'"); + assertEquals("cannot have fractional minutes before minutes sign when parsing <N16\u00B022.40'>", message); + message = parseException(false, ""); + assertEquals("end of field without any number seen when parsing <>", message); + message = parseException(false, "Yahoo!"); + assertEquals("invalid character: Y when parsing <Yahoo!>", message); + message = parseException(false, "N63O025.105"); + assertEquals("invalid character: O when parsing <N63O025.105>", message); + } + +} diff --git a/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp b/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp index 5d8c7735c0e..8307954faae 100644 --- a/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp +++ b/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp @@ -38,11 +38,6 @@ void GetDocsumsStateCallback::FillRankFeatures(GetDocsumsState * state, IDocsumE } } -void GetDocsumsStateCallback::ParseLocation(GetDocsumsState *state) -{ - (void) state; -} - void GetDocsumsStateCallback::FillDocumentLocations(GetDocsumsState *state, IDocsumEnvironment * env) { (void) state; diff --git a/vsm/src/vespa/vsm/vsm/vsm-adapter.h b/vsm/src/vespa/vsm/vsm/vsm-adapter.h index cffae318586..31e472713de 100644 --- a/vsm/src/vespa/vsm/vsm/vsm-adapter.h +++ b/vsm/src/vespa/vsm/vsm/vsm-adapter.h @@ -40,7 +40,6 @@ public: GetDocsumsStateCallback(); void FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override; void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override; - void ParseLocation(GetDocsumsState * state) override; virtual void FillDocumentLocations(GetDocsumsState * state, IDocsumEnvironment * env); virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::MatchingElementsFields& fields) override; void setSummaryFeatures(const search::FeatureSet::SP & sf) { _summaryFeatures = sf; } |