aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-07-10 12:00:16 +0000
committerArne Juul <arnej@verizonmedia.com>2020-07-15 15:39:21 +0000
commitcd9cb9bb171a3f9dd8b75976c57f579a4bda9c10 (patch)
tree13ae73f380c0ba6071ab65f4f0405a3bae02a7dc /searchlib/src
parent8b2950f299c94c2a3782a061a985ec307792641d (diff)
add common geo location parsing
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/common/location/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/common/location/geo_location_test.cpp135
-rw-r--r--searchlib/src/vespa/searchlib/common/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location_spec.cpp220
-rw-r--r--searchlib/src/vespa/searchlib/common/geo_location_spec.h53
5 files changed, 418 insertions, 0 deletions
diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt
index 64a894096d5..0e36db46db9 100644
--- a/searchlib/src/tests/common/location/CMakeLists.txt
+++ b/searchlib/src/tests/common/location/CMakeLists.txt
@@ -6,3 +6,12 @@ vespa_add_executable(searchlib_location_test_app TEST
searchlib
)
vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app)
+
+vespa_add_executable(searchlib_geo_location_test_app TEST
+ SOURCES
+ geo_location_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_geo_location_test_app COMMAND searchlib_geo_location_test_app)
diff --git a/searchlib/src/tests/common/location/geo_location_test.cpp b/searchlib/src/tests/common/location/geo_location_test.cpp
new file mode 100644
index 00000000000..34567b430b7
--- /dev/null
+++ b/searchlib/src/tests/common/location/geo_location_test.cpp
@@ -0,0 +1,135 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <stdio.h>
+#include <vespa/searchlib/common/geo_location_spec.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using search::common::GeoLocationSpec;
+
+bool is_parseable(const char *str) {
+ GeoLocationSpec loc;
+ return loc.parse(str);
+}
+
+GeoLocationSpec parse(const char *str) {
+ GeoLocationSpec loc;
+ EXPECT_TRUE(loc.parse(str));
+ return loc;
+}
+
+TEST(GeoLocationSpec, malformed_bounding_boxes_are_not_parseable) {
+ EXPECT_TRUE(is_parseable("[2,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[1,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[3,10,20,30,40]"));
+ EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30,40"));
+ EXPECT_FALSE(is_parseable("[2,10,20,30]"));
+ EXPECT_FALSE(is_parseable("[10,20,30,40]"));
+}
+
+TEST(GeoLocationSpec, malformed_circles_are_not_parseable) {
+ EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)"));
+ EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5)"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0"));
+ EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000"));
+ EXPECT_FALSE(is_parseable("(10,20,5)"));
+}
+
+TEST(GeoLocationSpec, bounding_boxes_can_be_parsed) {
+ auto loc = parse("[2,10,20,30,40]");
+ EXPECT_EQ(false, loc.hasPoint());
+ EXPECT_EQ(true, loc.hasBoundingBox());
+ EXPECT_EQ(0u, loc.getXAspect());
+ EXPECT_EQ(0, loc.getX());
+ EXPECT_EQ(0, loc.getY());
+ EXPECT_EQ(std::numeric_limits<uint32_t>::max(), loc.getRadius());
+ EXPECT_EQ(10, loc.getMinX());
+ EXPECT_EQ(20, loc.getMinY());
+ EXPECT_EQ(30, loc.getMaxX());
+ EXPECT_EQ(40, loc.getMaxY());
+}
+
+TEST(GeoLocationSpec, circles_can_be_parsed) {
+ auto loc = parse("(2,10,20,5,0,0,0)");
+ EXPECT_EQ(true, loc.hasPoint());
+ EXPECT_EQ(true, loc.hasBoundingBox());
+ EXPECT_EQ(0u, loc.getXAspect());
+ EXPECT_EQ(10, loc.getX());
+ EXPECT_EQ(20, loc.getY());
+ EXPECT_EQ(5u, loc.getRadius());
+ EXPECT_EQ(5, loc.getMinX());
+ EXPECT_EQ(15, loc.getMinY());
+ EXPECT_EQ(15, loc.getMaxX());
+ EXPECT_EQ(25, loc.getMaxY());
+}
+
+TEST(GeoLocationSpec, circles_can_have_aspect_ratio) {
+ auto loc = parse("(2,10,20,5,0,0,0,2147483648)");
+ EXPECT_EQ(true, loc.hasPoint());
+ EXPECT_EQ(true, loc.hasBoundingBox());
+ EXPECT_EQ(2147483648u, loc.getXAspect());
+ EXPECT_EQ(10, loc.getX());
+ EXPECT_EQ(20, loc.getY());
+ EXPECT_EQ(5u, loc.getRadius());
+ EXPECT_EQ(-1, loc.getMinX());
+ EXPECT_EQ(15, loc.getMinY());
+ EXPECT_EQ(21, loc.getMaxX());
+ EXPECT_EQ(25, loc.getMaxY());
+}
+
+TEST(GeoLocationSpec, bounding_box_can_be_specified_after_circle) {
+ auto loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]");
+ EXPECT_EQ(true, loc.hasPoint());
+ EXPECT_EQ(true, loc.hasBoundingBox());
+ EXPECT_EQ(0u, loc.getXAspect());
+ EXPECT_EQ(10, loc.getX());
+ EXPECT_EQ(20, loc.getY());
+ EXPECT_EQ(5u, loc.getRadius());
+ EXPECT_EQ(10, loc.getMinX());
+ EXPECT_EQ(20, loc.getMinY());
+ EXPECT_EQ(15, loc.getMaxX());
+ EXPECT_EQ(25, loc.getMaxY());
+}
+
+TEST(GeoLocationSpec, circles_can_be_specified_after_bounding_box) {
+ auto loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)");
+ EXPECT_EQ(true, loc.hasPoint());
+ EXPECT_EQ(true, loc.hasBoundingBox());
+ EXPECT_EQ(0u, loc.getXAspect());
+ EXPECT_EQ(10, loc.getX());
+ EXPECT_EQ(20, loc.getY());
+ EXPECT_EQ(5u, loc.getRadius());
+ EXPECT_EQ(10, loc.getMinX());
+ EXPECT_EQ(20, loc.getMinY());
+ EXPECT_EQ(15, loc.getMaxX());
+ EXPECT_EQ(25, loc.getMaxY());
+}
+
+TEST(GeoLocationSpec, santa_search_gives_non_wrapped_bounding_box) {
+ auto loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)");
+ EXPECT_GE(loc.getMaxX(), loc.getMinX());
+ EXPECT_GE(loc.getMaxY(), loc.getMinY());
+}
+
+TEST(GeoLocationSpec, near_boundary_search_gives_non_wrapped_bounding_box) {
+ auto loc1 = parse("(2,2000000000,2000000000,3000000000,0,1,0)");
+ // fprintf(stderr, "positive near boundary: %s\n", loc1.getLocationString().c_str());
+ EXPECT_GE(loc1.getMaxX(), loc1.getMinX());
+ EXPECT_GE(loc1.getMaxY(), loc1.getMinY());
+ EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.getMaxY());
+ EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.getMaxY());
+
+ auto loc2 = parse("(2,-2000000000,-2000000000,3000000000,0,1,0)");
+ // fprintf(stderr, "negative near boundary: %s\n", loc2.getLocationString().c_str());
+ EXPECT_GE(loc2.getMaxX(), loc2.getMinX());
+ EXPECT_GE(loc2.getMaxY(), loc2.getMinY());
+ EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.getMinX());
+ EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.getMinY());
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
index 5d30260a169..fd99f31aec8 100644
--- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
@@ -12,6 +12,7 @@ vespa_add_library(searchlib_common OBJECT
featureset.cpp
fileheadercontext.cpp
gatecallback.cpp
+ geo_location_spec.cpp
growablebitvector.cpp
indexmetainfo.cpp
location.cpp
diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp
new file mode 100644
index 00000000000..76dffb5188e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp
@@ -0,0 +1,220 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "geo_location_spec.h"
+#include <limits>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+namespace {
+
+int getInt(const char * &p) {
+ int val;
+ bool isminus;
+ val = 0;
+ isminus = false;
+ if (*p == '-') {
+ isminus = true;
+ p++;
+ }
+ while (*p >= '0' && *p <= '9') {
+ val *= 10;
+ val += (*p++ - '0');
+ }
+ return isminus ? - val : val;
+}
+
+} // namespace <unnamed>
+
+namespace search::common {
+
+GeoLocationSpec::GeoLocationSpec() :
+ _valid(false),
+ _hasPoint(false),
+ _hasRadius(false),
+ _hasBoundingBox(false),
+ _x(0),
+ _y(0),
+ _x_aspect(0u),
+ _radius(std::numeric_limits<uint32_t>::max()),
+ _min_x(std::numeric_limits<int32_t>::min()),
+ _max_x(std::numeric_limits<int32_t>::max()),
+ _min_y(std::numeric_limits<int32_t>::min()),
+ _max_y(std::numeric_limits<int32_t>::max()),
+ _parseError(NULL)
+{
+}
+
+
+bool
+GeoLocationSpec::getDimensionality(const char * &p) {
+ if (*p == '2') {
+ p++;
+ if (*p != ',') {
+ _parseError = "Missing comma after 2D dimensionality";
+ return false;
+ }
+ p++;
+ return true;
+ }
+ _parseError = "Bad dimensionality spec, not 2D";
+ return false;
+}
+
+std::string
+GeoLocationSpec::getLocationString() const
+{
+ vespalib::asciistream loc;
+ if (hasPoint()) {
+ loc << "(2" // dimensionality
+ << "," << _x
+ << "," << _y
+ << "," << _radius
+ << "," << "0" // table id.
+ << "," << "1" // rank multiplier.
+ << "," << "0"; // rank only on distance.
+ if (_x_aspect != 0) {
+ loc << "," << _x_aspect; // aspect multiplier
+ }
+ loc << ")";
+ }
+ if (hasPoint()) {
+ loc << "[2," << _min_x
+ << "," << _min_y
+ << "," << _max_x
+ << "," << _max_y
+ << "]" ;
+ }
+ return loc.str();
+}
+
+bool
+GeoLocationSpec::parse(const std::string &locStr)
+{
+ bool foundBoundingBox = false;
+ bool foundLoc = false;
+ const char *p = locStr.c_str();
+ while (*p != '\0') {
+ if (*p == '[') {
+ p++;
+ if (foundBoundingBox) {
+ _parseError = "Duplicate bounding box";
+ return false;
+ }
+ foundBoundingBox = true;
+ if (!getDimensionality(p))
+ return false;
+ _min_x = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after minx";
+ return false;
+ }
+ p++;
+ _min_y = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after miny";
+ return false;
+ }
+ p++;
+ _max_x = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after maxx";
+ return false;
+ }
+ p++;
+ _max_y = getInt(p);
+ if (*p != ']') {
+ _parseError = "Missing ']' after maxy";
+ return false;
+ }
+ p++;
+ } else if (*p == '(') {
+ p++;
+ if (foundLoc) {
+ _parseError = "Duplicate location";
+ return false;
+ }
+ foundLoc = true;
+ if (!getDimensionality(p))
+ return false;
+ _x = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after x position";
+ return false;
+ }
+ p++;
+ _y = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after y position";
+ return false;
+ }
+ p++;
+ _radius = getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after radius";
+ return false;
+ }
+ p++;
+ /* _tableID = */ (void) getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after tableID";
+ return false;
+ }
+ p++;
+ /* _rankMultiplier = */ (void) getInt(p);
+ if (*p != ',') {
+ _parseError = "Missing ',' after rank multiplier";
+ return false;
+ }
+ p++;
+ /* _rankOnlyOnDistance = */ (void) getInt(p);
+ if (*p == ',') {
+ p++;
+ _x_aspect = getInt(p);
+ if (*p != ')') {
+ _parseError = "Missing ')' after xAspect";
+ return false;
+ }
+ } else {
+ if (*p != ')') {
+ _parseError = "Missing ')' after rankOnlyOnDistance flag";
+ return false;
+ }
+ }
+ p++;
+ } else if (*p == ' ') {
+ p++;
+ } else {
+ _parseError = "Unexpected char in location spec";
+ return false;
+ }
+ }
+
+ if (foundLoc) {
+ _hasRadius = (_radius != std::numeric_limits<uint32_t>::max());
+ uint32_t maxdx = _radius;
+ if (_x_aspect != 0) {
+ uint64_t maxdx2 = ((static_cast<uint64_t>(_radius) << 32) + 0xffffffffu) /
+ _x_aspect;
+ if (maxdx2 >= 0xffffffffu)
+ maxdx = 0xffffffffu;
+ else
+ maxdx = static_cast<uint32_t>(maxdx2);
+ }
+ int64_t implied_max_x = int64_t(_x) + int64_t(maxdx);
+ int64_t implied_min_x = int64_t(_x) - int64_t(maxdx);
+
+ int64_t implied_max_y = int64_t(_y) + int64_t(_radius);
+ int64_t implied_min_y = int64_t(_y) - int64_t(_radius);
+
+ if (implied_max_x < _max_x) _max_x = implied_max_x;
+ if (implied_min_x > _min_x) _min_x = implied_min_x;
+
+ if (implied_max_y < _max_y) _max_y = implied_max_y;
+ if (implied_min_y > _min_y) _min_y = implied_min_y;
+ }
+ _hasPoint = foundLoc;
+ _hasBoundingBox = foundBoundingBox || _hasRadius;
+ _valid = (_hasPoint || _hasBoundingBox);
+ return _valid;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.h b/searchlib/src/vespa/searchlib/common/geo_location_spec.h
new file mode 100644
index 00000000000..f8f4b0f5cca
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.h
@@ -0,0 +1,53 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <string>
+#include <cstdint>
+
+namespace search::common {
+
+class GeoLocationSpec
+{
+public:
+ GeoLocationSpec();
+
+ bool isValid() const { return _valid; }
+ bool hasPoint() const { return _hasPoint; }
+ bool hasBoundingBox() const { return _hasBoundingBox; }
+
+ uint32_t getXAspect() const { return _x_aspect; }
+ int32_t getX() const { return _x; }
+ int32_t getY() const { return _y; }
+ uint32_t getRadius() const { return _radius; }
+ const char * getParseError() const { return _parseError; }
+
+ int32_t getMinX() const { return _min_x; }
+ int32_t getMinY() const { return _min_y; }
+ int32_t getMaxX() const { return _max_x; }
+ int32_t getMaxY() const { return _max_y; }
+
+ bool parse(const std::string &locStr);
+
+ std::string getLocationString() const;
+
+private:
+ bool _valid;
+ bool _hasPoint;
+ bool _hasRadius;
+ bool _hasBoundingBox;
+
+ int32_t _x; /* Query X position */
+ int32_t _y; /* Query Y position */
+ uint32_t _x_aspect; /* X distance multiplier fraction */
+ uint32_t _radius; /* Radius for euclidean distance */
+ int32_t _min_x; /* Min X coordinate */
+ int32_t _max_x; /* Max X coordinate */
+ int32_t _min_y; /* Min Y coordinate */
+ int32_t _max_y; /* Max Y coordinate */
+
+ const char *_parseError;
+ bool getDimensionality(const char * &p);
+};
+
+} // namespace