diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-07-10 12:00:16 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-07-15 15:39:21 +0000 |
commit | cd9cb9bb171a3f9dd8b75976c57f579a4bda9c10 (patch) | |
tree | 13ae73f380c0ba6071ab65f4f0405a3bae02a7dc /searchlib/src | |
parent | 8b2950f299c94c2a3782a061a985ec307792641d (diff) |
add common geo location parsing
Diffstat (limited to 'searchlib/src')
5 files changed, 418 insertions, 0 deletions
diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt index 64a894096d5..0e36db46db9 100644 --- a/searchlib/src/tests/common/location/CMakeLists.txt +++ b/searchlib/src/tests/common/location/CMakeLists.txt @@ -6,3 +6,12 @@ vespa_add_executable(searchlib_location_test_app TEST searchlib ) vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app) + +vespa_add_executable(searchlib_geo_location_test_app TEST + SOURCES + geo_location_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_geo_location_test_app COMMAND searchlib_geo_location_test_app) diff --git a/searchlib/src/tests/common/location/geo_location_test.cpp b/searchlib/src/tests/common/location/geo_location_test.cpp new file mode 100644 index 00000000000..34567b430b7 --- /dev/null +++ b/searchlib/src/tests/common/location/geo_location_test.cpp @@ -0,0 +1,135 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <stdio.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::common::GeoLocationSpec; + +bool is_parseable(const char *str) { + GeoLocationSpec loc; + return loc.parse(str); +} + +GeoLocationSpec parse(const char *str) { + GeoLocationSpec loc; + EXPECT_TRUE(loc.parse(str)); + return loc; +} + +TEST(GeoLocationSpec, malformed_bounding_boxes_are_not_parseable) { + EXPECT_TRUE(is_parseable("[2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[1,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[3,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40")); + EXPECT_FALSE(is_parseable("[2,10,20,30]")); + EXPECT_FALSE(is_parseable("[10,20,30,40]")); +} + +TEST(GeoLocationSpec, malformed_circles_are_not_parseable) { + EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000")); + EXPECT_FALSE(is_parseable("(10,20,5)")); +} + +TEST(GeoLocationSpec, bounding_boxes_can_be_parsed) { + auto loc = parse("[2,10,20,30,40]"); + EXPECT_EQ(false, loc.hasPoint()); + EXPECT_EQ(true, loc.hasBoundingBox()); + EXPECT_EQ(0u, loc.getXAspect()); + EXPECT_EQ(0, loc.getX()); + EXPECT_EQ(0, loc.getY()); + EXPECT_EQ(std::numeric_limits<uint32_t>::max(), loc.getRadius()); + EXPECT_EQ(10, loc.getMinX()); + EXPECT_EQ(20, loc.getMinY()); + EXPECT_EQ(30, loc.getMaxX()); + EXPECT_EQ(40, loc.getMaxY()); +} + +TEST(GeoLocationSpec, circles_can_be_parsed) { + auto loc = parse("(2,10,20,5,0,0,0)"); + EXPECT_EQ(true, loc.hasPoint()); + EXPECT_EQ(true, loc.hasBoundingBox()); + EXPECT_EQ(0u, loc.getXAspect()); + EXPECT_EQ(10, loc.getX()); + EXPECT_EQ(20, loc.getY()); + EXPECT_EQ(5u, loc.getRadius()); + EXPECT_EQ(5, loc.getMinX()); + EXPECT_EQ(15, loc.getMinY()); + EXPECT_EQ(15, loc.getMaxX()); + EXPECT_EQ(25, loc.getMaxY()); +} + +TEST(GeoLocationSpec, circles_can_have_aspect_ratio) { + auto loc = parse("(2,10,20,5,0,0,0,2147483648)"); + EXPECT_EQ(true, loc.hasPoint()); + EXPECT_EQ(true, loc.hasBoundingBox()); + EXPECT_EQ(2147483648u, loc.getXAspect()); + EXPECT_EQ(10, loc.getX()); + EXPECT_EQ(20, loc.getY()); + EXPECT_EQ(5u, loc.getRadius()); + EXPECT_EQ(-1, loc.getMinX()); + EXPECT_EQ(15, loc.getMinY()); + EXPECT_EQ(21, loc.getMaxX()); + EXPECT_EQ(25, loc.getMaxY()); +} + +TEST(GeoLocationSpec, bounding_box_can_be_specified_after_circle) { + auto loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]"); + EXPECT_EQ(true, loc.hasPoint()); + EXPECT_EQ(true, loc.hasBoundingBox()); + EXPECT_EQ(0u, loc.getXAspect()); + EXPECT_EQ(10, loc.getX()); + EXPECT_EQ(20, loc.getY()); + EXPECT_EQ(5u, loc.getRadius()); + EXPECT_EQ(10, loc.getMinX()); + EXPECT_EQ(20, loc.getMinY()); + EXPECT_EQ(15, loc.getMaxX()); + EXPECT_EQ(25, loc.getMaxY()); +} + +TEST(GeoLocationSpec, circles_can_be_specified_after_bounding_box) { + auto loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)"); + EXPECT_EQ(true, loc.hasPoint()); + EXPECT_EQ(true, loc.hasBoundingBox()); + EXPECT_EQ(0u, loc.getXAspect()); + EXPECT_EQ(10, loc.getX()); + EXPECT_EQ(20, loc.getY()); + EXPECT_EQ(5u, loc.getRadius()); + EXPECT_EQ(10, loc.getMinX()); + EXPECT_EQ(20, loc.getMinY()); + EXPECT_EQ(15, loc.getMaxX()); + EXPECT_EQ(25, loc.getMaxY()); +} + +TEST(GeoLocationSpec, santa_search_gives_non_wrapped_bounding_box) { + auto loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)"); + EXPECT_GE(loc.getMaxX(), loc.getMinX()); + EXPECT_GE(loc.getMaxY(), loc.getMinY()); +} + +TEST(GeoLocationSpec, near_boundary_search_gives_non_wrapped_bounding_box) { + auto loc1 = parse("(2,2000000000,2000000000,3000000000,0,1,0)"); + // fprintf(stderr, "positive near boundary: %s\n", loc1.getLocationString().c_str()); + EXPECT_GE(loc1.getMaxX(), loc1.getMinX()); + EXPECT_GE(loc1.getMaxY(), loc1.getMinY()); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.getMaxY()); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), loc1.getMaxY()); + + auto loc2 = parse("(2,-2000000000,-2000000000,3000000000,0,1,0)"); + // fprintf(stderr, "negative near boundary: %s\n", loc2.getLocationString().c_str()); + EXPECT_GE(loc2.getMaxX(), loc2.getMinX()); + EXPECT_GE(loc2.getMaxY(), loc2.getMinY()); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.getMinX()); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), loc2.getMinY()); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt index 5d30260a169..fd99f31aec8 100644 --- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -12,6 +12,7 @@ vespa_add_library(searchlib_common OBJECT featureset.cpp fileheadercontext.cpp gatecallback.cpp + geo_location_spec.cpp growablebitvector.cpp indexmetainfo.cpp location.cpp diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp new file mode 100644 index 00000000000..76dffb5188e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.cpp @@ -0,0 +1,220 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "geo_location_spec.h" +#include <limits> +#include <vespa/vespalib/stllike/asciistream.h> + +namespace { + +int getInt(const char * &p) { + int val; + bool isminus; + val = 0; + isminus = false; + if (*p == '-') { + isminus = true; + p++; + } + while (*p >= '0' && *p <= '9') { + val *= 10; + val += (*p++ - '0'); + } + return isminus ? - val : val; +} + +} // namespace <unnamed> + +namespace search::common { + +GeoLocationSpec::GeoLocationSpec() : + _valid(false), + _hasPoint(false), + _hasRadius(false), + _hasBoundingBox(false), + _x(0), + _y(0), + _x_aspect(0u), + _radius(std::numeric_limits<uint32_t>::max()), + _min_x(std::numeric_limits<int32_t>::min()), + _max_x(std::numeric_limits<int32_t>::max()), + _min_y(std::numeric_limits<int32_t>::min()), + _max_y(std::numeric_limits<int32_t>::max()), + _parseError(NULL) +{ +} + + +bool +GeoLocationSpec::getDimensionality(const char * &p) { + if (*p == '2') { + p++; + if (*p != ',') { + _parseError = "Missing comma after 2D dimensionality"; + return false; + } + p++; + return true; + } + _parseError = "Bad dimensionality spec, not 2D"; + return false; +} + +std::string +GeoLocationSpec::getLocationString() const +{ + vespalib::asciistream loc; + if (hasPoint()) { + loc << "(2" // dimensionality + << "," << _x + << "," << _y + << "," << _radius + << "," << "0" // table id. + << "," << "1" // rank multiplier. + << "," << "0"; // rank only on distance. + if (_x_aspect != 0) { + loc << "," << _x_aspect; // aspect multiplier + } + loc << ")"; + } + if (hasPoint()) { + loc << "[2," << _min_x + << "," << _min_y + << "," << _max_x + << "," << _max_y + << "]" ; + } + return loc.str(); +} + +bool +GeoLocationSpec::parse(const std::string &locStr) +{ + bool foundBoundingBox = false; + bool foundLoc = false; + const char *p = locStr.c_str(); + while (*p != '\0') { + if (*p == '[') { + p++; + if (foundBoundingBox) { + _parseError = "Duplicate bounding box"; + return false; + } + foundBoundingBox = true; + if (!getDimensionality(p)) + return false; + _min_x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after minx"; + return false; + } + p++; + _min_y = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after miny"; + return false; + } + p++; + _max_x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after maxx"; + return false; + } + p++; + _max_y = getInt(p); + if (*p != ']') { + _parseError = "Missing ']' after maxy"; + return false; + } + p++; + } else if (*p == '(') { + p++; + if (foundLoc) { + _parseError = "Duplicate location"; + return false; + } + foundLoc = true; + if (!getDimensionality(p)) + return false; + _x = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after x position"; + return false; + } + p++; + _y = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after y position"; + return false; + } + p++; + _radius = getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after radius"; + return false; + } + p++; + /* _tableID = */ (void) getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after tableID"; + return false; + } + p++; + /* _rankMultiplier = */ (void) getInt(p); + if (*p != ',') { + _parseError = "Missing ',' after rank multiplier"; + return false; + } + p++; + /* _rankOnlyOnDistance = */ (void) getInt(p); + if (*p == ',') { + p++; + _x_aspect = getInt(p); + if (*p != ')') { + _parseError = "Missing ')' after xAspect"; + return false; + } + } else { + if (*p != ')') { + _parseError = "Missing ')' after rankOnlyOnDistance flag"; + return false; + } + } + p++; + } else if (*p == ' ') { + p++; + } else { + _parseError = "Unexpected char in location spec"; + return false; + } + } + + if (foundLoc) { + _hasRadius = (_radius != std::numeric_limits<uint32_t>::max()); + uint32_t maxdx = _radius; + if (_x_aspect != 0) { + uint64_t maxdx2 = ((static_cast<uint64_t>(_radius) << 32) + 0xffffffffu) / + _x_aspect; + if (maxdx2 >= 0xffffffffu) + maxdx = 0xffffffffu; + else + maxdx = static_cast<uint32_t>(maxdx2); + } + int64_t implied_max_x = int64_t(_x) + int64_t(maxdx); + int64_t implied_min_x = int64_t(_x) - int64_t(maxdx); + + int64_t implied_max_y = int64_t(_y) + int64_t(_radius); + int64_t implied_min_y = int64_t(_y) - int64_t(_radius); + + if (implied_max_x < _max_x) _max_x = implied_max_x; + if (implied_min_x > _min_x) _min_x = implied_min_x; + + if (implied_max_y < _max_y) _max_y = implied_max_y; + if (implied_min_y > _min_y) _min_y = implied_min_y; + } + _hasPoint = foundLoc; + _hasBoundingBox = foundBoundingBox || _hasRadius; + _valid = (_hasPoint || _hasBoundingBox); + return _valid; +} + +} diff --git a/searchlib/src/vespa/searchlib/common/geo_location_spec.h b/searchlib/src/vespa/searchlib/common/geo_location_spec.h new file mode 100644 index 00000000000..f8f4b0f5cca --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/geo_location_spec.h @@ -0,0 +1,53 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <string> +#include <cstdint> + +namespace search::common { + +class GeoLocationSpec +{ +public: + GeoLocationSpec(); + + bool isValid() const { return _valid; } + bool hasPoint() const { return _hasPoint; } + bool hasBoundingBox() const { return _hasBoundingBox; } + + uint32_t getXAspect() const { return _x_aspect; } + int32_t getX() const { return _x; } + int32_t getY() const { return _y; } + uint32_t getRadius() const { return _radius; } + const char * getParseError() const { return _parseError; } + + int32_t getMinX() const { return _min_x; } + int32_t getMinY() const { return _min_y; } + int32_t getMaxX() const { return _max_x; } + int32_t getMaxY() const { return _max_y; } + + bool parse(const std::string &locStr); + + std::string getLocationString() const; + +private: + bool _valid; + bool _hasPoint; + bool _hasRadius; + bool _hasBoundingBox; + + int32_t _x; /* Query X position */ + int32_t _y; /* Query Y position */ + uint32_t _x_aspect; /* X distance multiplier fraction */ + uint32_t _radius; /* Radius for euclidean distance */ + int32_t _min_x; /* Min X coordinate */ + int32_t _max_x; /* Max X coordinate */ + int32_t _min_y; /* Min Y coordinate */ + int32_t _max_y; /* Max Y coordinate */ + + const char *_parseError; + bool getDimensionality(const char * &p); +}; + +} // namespace |