diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2022-02-01 18:19:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-01 18:19:12 +0100 |
commit | b754deecd3cd7856f7c407dbab479e950316a9ab (patch) | |
tree | d9de02de427778aca9c58ee756c9ce270701a71d | |
parent | bdd6e8a94ad4919bdc326b9e1a44499f06572e87 (diff) | |
parent | c0e1557f5d620287a5378ae6a7c25bae0135e748 (diff) |
Merge pull request #20998 from vespa-engine/arnej/add-km-distance-output
add extra output with distance in km
7 files changed, 323 insertions, 2 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp index 1ba069818ba..7ebc3759813 100644 --- a/searchlib/src/tests/features/prod_features.cpp +++ b/searchlib/src/tests/features/prod_features.cpp @@ -14,6 +14,7 @@ #include <vespa/searchlib/features/attributefeature.h> #include <vespa/searchlib/features/closenessfeature.h> #include <vespa/searchlib/features/distancefeature.h> +#include <vespa/searchlib/features/great_circle_distance_feature.h> #include <vespa/searchlib/features/dotproductfeature.h> #include <vespa/searchlib/features/fieldlengthfeature.h> #include <vespa/searchlib/features/fieldmatchfeature.h> @@ -93,6 +94,7 @@ Test::Main() TEST_DO(testAttributeMatch()); TEST_FLUSH(); TEST_DO(testCloseness()); TEST_FLUSH(); TEST_DO(testMatchCount()); TEST_FLUSH(); + TEST_DO(testGreatCircleDistance()); TEST_FLUSH(); TEST_DO(testDistance()); TEST_FLUSH(); TEST_DO(testDistanceToPath()); TEST_FLUSH(); TEST_DO(testDotProduct()); TEST_FLUSH(); @@ -819,6 +821,67 @@ Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uin ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS))); } +namespace { + +struct AirPort { + const char *tla; + double lat; + double lng; +}; + +std::pair<int32_t, int32_t> toXY(const AirPort &p) { + return std::make_pair((int)(p.lng * 1.0e6), + (int)(p.lat * 1.0e6)); +} + +GeoLocation toGL(const AirPort &p) { + int32_t x = (int)(p.lng * 1.0e6); + int32_t y = (int)(p.lat * 1.0e6); + GeoLocation::Point gp{x, y}; + return GeoLocation{gp}; +} + +} + +void +Test::testGreatCircleDistance() +{ + { // Test blueprint. + GreatCircleDistanceBlueprint pt; + EXPECT_TRUE(assertCreateInstance(pt, "great_circle_distance")); + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FtIndexEnvironment idx_env; + idx_env + .getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos_zcurve"); + FT_SETUP_OK(pt, idx_env, params.add("pos"), in, + out.add("km").add("latitude").add("longitude")); + FT_DUMP_EMPTY(_factory, "great_circle_distance"); + } + { // Test executor. + FtFeatureTest ft(_factory, "great_circle_distance(pos)"); + const AirPort SFO = { "SFO", 37.618806, -122.375416 }; + const AirPort TRD = { "TRD", 63.457556, 10.924250 }; + std::vector<std::pair<int32_t,int32_t>> pos = { toXY(SFO), toXY(TRD) }; + setupForDistanceTest(ft, "pos_zcurve", pos, true); + const AirPort LHR = { "LHR", 51.477500, -0.461388 }; + const AirPort JFK = { "JFK", 40.639928, -73.778692 }; + ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(LHR)}); + ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(JFK)}); + ASSERT_TRUE(ft.setup()); + double exp = 1494; // according to gcmap.com + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0). + addScore("great_circle_distance(pos)", exp))); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0). + addScore("great_circle_distance(pos).km", exp))); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9). + addScore("great_circle_distance(pos).latitude", TRD.lat))); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9). + addScore("great_circle_distance(pos).longitude", TRD.lng))); + } +} + void Test::testDistance() { @@ -830,7 +893,7 @@ Test::testDistance() StringList params, in, out; FT_SETUP_FAIL(pt, params); FT_SETUP_OK(pt, params.add("pos"), in, - out.add("out").add("index").add("latitude").add("longitude")); + out.add("out").add("index").add("latitude").add("longitude").add("km")); FT_DUMP_EMPTY(_factory, "distance"); } @@ -963,6 +1026,8 @@ Test::assert2DZDistance(feature_t exp, const vespalib::string & positions, ASSERT_TRUE(ft.setup()); ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4). addScore("distance(pos)", exp))); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4). + addScore("distance(pos).km", exp * 0.00011119508023))); ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-30). addScore("distance(pos).index", hit_index))); ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9). diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h index 58e6b4953cc..ad21d7d7ccc 100644 --- a/searchlib/src/tests/features/prod_features.h +++ b/searchlib/src/tests/features/prod_features.h @@ -19,6 +19,7 @@ public: void testAttributeMatch(); void testCloseness(); void testMatchCount(); + void testGreatCircleDistance(); void testDistance(); void testDistanceToPath(); void testDotProduct(); diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 9d4119a7faa..88531a46cb1 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -12,6 +12,7 @@ vespa_add_library(searchlib_features OBJECT debug_wait.cpp dense_tensor_attribute_executor.cpp direct_tensor_attribute_executor.cpp + great_circle_distance_feature.cpp distancefeature.cpp distancetopathfeature.cpp documenttestutils.cpp diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 518ade2a8f5..57fa5fc7cee 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -135,10 +135,15 @@ DistanceExecutor::DistanceExecutor(GeoLocationSpecPtrs locations, void DistanceExecutor::execute(uint32_t docId) { - outputs().set_number(0, calculateDistance(docId)); + static constexpr double earth_mean_radius = 6371.0088; + static constexpr double deg_to_rad = M_PI / 180.0; + static constexpr double km_from_internal = 1.0e-6 * deg_to_rad * earth_mean_radius; + feature_t internal_d = calculateDistance(docId); + outputs().set_number(0, internal_d); outputs().set_number(1, _best_index); outputs().set_number(2, _best_y * 1.0e-6); // latitude outputs().set_number(3, _best_x * 1.0e-6); // longitude + outputs().set_number(4, internal_d * km_from_internal); // km } const feature_t DistanceExecutor::DEFAULT_DISTANCE(6400000000.0); @@ -178,6 +183,7 @@ DistanceBlueprint::setup_geopos(const IIndexEnvironment & env, describeOutput("index", "Index in array of closest point"); describeOutput("latitude", "Latitude of closest point"); describeOutput("longitude", "Longitude of closest point"); + describeOutput("km", "Distance in kilometer units"); env.hintAttributeAccess(_arg_string); return true; } diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp new file mode 100644 index 00000000000..eb47c88ecd0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp @@ -0,0 +1,190 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "great_circle_distance_feature.h" +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/document/datatype/positiondatatype.h> +#include <vespa/vespalib/geo/zcurve.h> +#include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/stash.h> +#include <cmath> +#include <limits> +#include "utils.h" + +#include <vespa/log/log.h> +LOG_SETUP(".features.great_circle_distance_feature"); + +using namespace search::fef; +using namespace search::index::schema; +using vespalib::Issue; + +namespace search::features { + +feature_t GCDExecutor::calculateGCD(uint32_t docId) { + feature_t dist = std::numeric_limits<feature_t>::max(); + if (_locations.empty()) { + return dist; + } + _intBuf.fill(*_pos, docId); + uint32_t numValues = _intBuf.size(); + int32_t docx = 0; + int32_t docy = 0; + for (auto loc : _locations) { + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy); + double lat = docy / 1.0e6; + double lng = docx / 1.0e6; + double d = loc.km_great_circle_distance(lat, lng); + if (d < dist) { + dist = d; + _best_lat = lat; + _best_lng = lng; + } + } + } + return dist; +} + +GCDExecutor::GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos) + : FeatureExecutor(), + _locations(), + _pos(pos), + _intBuf() +{ + if (_pos == nullptr) { + return; + } + _intBuf.allocate(_pos->getMaxValueCount()); + for (const auto * p : locations) { + if (p && p->location.valid()) { + double lat = p->location.point.y * 1.0e-6; + double lng = p->location.point.x * 1.0e-6; + _locations.emplace_back(search::common::GeoGcd{lat, lng}); + } + } +} + +void +GCDExecutor::execute(uint32_t docId) +{ + outputs().set_number(0, calculateGCD(docId)); + outputs().set_number(1, _best_lat); // latitude + outputs().set_number(2, _best_lng); // longitude +} + + +GreatCircleDistanceBlueprint::GreatCircleDistanceBlueprint() : + Blueprint("great_circle_distance"), + _attr_name() +{ +} + +GreatCircleDistanceBlueprint::~GreatCircleDistanceBlueprint() = default; + +void GreatCircleDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +GreatCircleDistanceBlueprint::createInstance() const +{ + return std::make_unique<GreatCircleDistanceBlueprint>(); +} + +bool +GreatCircleDistanceBlueprint::setup_geopos(const IIndexEnvironment & env, const vespalib::string &attr) +{ + _attr_name = attr; + describeOutput("km", "The distance (in km) from the query position."); + describeOutput("latitude", "Latitude of closest point"); + describeOutput("longitude", "Longitude of closest point"); + env.hintAttributeAccess(_attr_name); + return true; +} + + +bool +GreatCircleDistanceBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + vespalib::string arg = params[0].getValue(); + if (params.size() == 1) { + // params[0] = attribute name + } else if (params.size() == 2) { + // params[0] = "field" + // params[1] = attribute name + if (arg == "field") { + arg = params[1].getValue(); + } else { + LOG(error, "first argument must be 'field' but was '%s'", arg.c_str()); + return false; + } + } else { + LOG(error, "bad params.size() = %zd", params.size()); + return false; + } + vespalib::string z = document::PositionDataType::getZCurveFieldName(arg); + const auto *fi = env.getFieldByName(z); + if (fi != nullptr && fi->hasAttribute()) { + auto dt = fi->get_data_type(); + auto ct = fi->collection(); + if (dt == DataType::INT64) { + if (ct == CollectionType::SINGLE || ct == CollectionType::ARRAY) { + return setup_geopos(env, z); + } + } + } + if (env.getFieldByName(arg) == nullptr && fi == nullptr) { + LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str()); + } else { + LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", arg.c_str(), getName().c_str()); + } + return false; +} + +FeatureExecutor & +GreatCircleDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const +{ + // expect geo pos: + const search::attribute::IAttributeVector * pos = nullptr; + GeoLocationSpecPtrs matching_locs; + GeoLocationSpecPtrs other_locs; + + for (auto loc_ptr : env.getAllLocations()) { + if (loc_ptr && loc_ptr->location.valid()) { + if (loc_ptr->field_name == _attr_name) { + LOG(debug, "found loc from query env matching '%s'", _attr_name.c_str()); + matching_locs.push_back(loc_ptr); + } else { + LOG(debug, "found loc(%s) from query env not matching arg(%s)", + loc_ptr->field_name.c_str(), _attr_name.c_str()); + other_locs.push_back(loc_ptr); + } + } + } + if (matching_locs.empty() && other_locs.empty()) { + LOG(debug, "createExecutor: no valid locations"); + return stash.create<GCDExecutor>(matching_locs, nullptr); + } + LOG(debug, "createExecutor: valid location, attribute='%s'", _attr_name.c_str()); + pos = env.getAttributeContext().getAttribute(_attr_name); + if (pos != nullptr) { + if (!pos->isIntegerType()) { + Issue::report("distance feature: The position attribute '%s' is not an integer attribute.", + pos->getName().c_str()); + pos = nullptr; + } else if (pos->getCollectionType() == attribute::CollectionType::WSET) { + Issue::report("distance feature: The position attribute '%s' is a weighted set attribute.", + pos->getName().c_str()); + pos = nullptr; + } + } else { + Issue::report("distance feature: The position attribute '%s' was not found.", _attr_name.c_str()); + } + LOG(debug, "use '%s' locations with pos=%p", matching_locs.empty() ? "other" : "matching", pos); + return stash.create<GCDExecutor>(matching_locs.empty() ? other_locs : matching_locs, pos); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h new file mode 100644 index 00000000000..d44e4f5569b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/searchlib/common/geo_gcd.h> + +namespace search::features { + +/** Convenience typedef. */ +using GeoLocationSpecPtrs = std::vector<const search::common::GeoLocationSpec *>; + +/** + * Implements the executor for the great circle distance feature. + */ +class GCDExecutor : public fef::FeatureExecutor { +private: + std::vector<search::common::GeoGcd> _locations; + const attribute::IAttributeVector * _pos; + attribute::IntegerContent _intBuf; + feature_t _best_lat; + feature_t _best_lng; + + feature_t calculateGCD(uint32_t docId); +public: + /** + * Constructs an executor for the GCD feature. + * + * @param locations location objects associated with the query environment. + * @param pos the attribute to use for positions (expects zcurve encoding). + */ + GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos); + void execute(uint32_t docId) override; +}; + +/** + * Implements the blueprint for the GCD executor. + */ +class GreatCircleDistanceBlueprint : public fef::Blueprint { +private: + vespalib::string _attr_name; + bool setup_geopos(const fef::IIndexEnvironment & env, const vespalib::string &attr); +public: + GreatCircleDistanceBlueprint(); + ~GreatCircleDistanceBlueprint(); + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions().desc().string().desc().string().string(); + } + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index f2d5bd745ac..2bc8a349d1b 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -10,6 +10,7 @@ #include "debug_attribute_wait.h" #include "debug_wait.h" #include "distancefeature.h" +#include "great_circle_distance_feature.h" #include "distancetopathfeature.h" #include "dotproductfeature.h" #include "element_completeness_feature.h" @@ -126,6 +127,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>()); registry.addPrototype(std::make_shared<OnnxBlueprint>("onnx")); registry.addPrototype(std::make_shared<OnnxBlueprint>("onnxModel")); + registry.addPrototype(std::make_shared<GreatCircleDistanceBlueprint>()); // Ranking Expression auto replacers = std::make_unique<ListExpressionReplacer>(); |