summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2022-02-01 18:19:12 +0100
committerGitHub <noreply@github.com>2022-02-01 18:19:12 +0100
commitb754deecd3cd7856f7c407dbab479e950316a9ab (patch)
treed9de02de427778aca9c58ee756c9ce270701a71d
parentbdd6e8a94ad4919bdc326b9e1a44499f06572e87 (diff)
parentc0e1557f5d620287a5378ae6a7c25bae0135e748 (diff)
Merge pull request #20998 from vespa-engine/arnej/add-km-distance-output
add extra output with distance in km
-rw-r--r--searchlib/src/tests/features/prod_features.cpp67
-rw-r--r--searchlib/src/tests/features/prod_features.h1
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/distancefeature.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp190
-rw-r--r--searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h56
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
7 files changed, 323 insertions, 2 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index 1ba069818ba..7ebc3759813 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -14,6 +14,7 @@
#include <vespa/searchlib/features/attributefeature.h>
#include <vespa/searchlib/features/closenessfeature.h>
#include <vespa/searchlib/features/distancefeature.h>
+#include <vespa/searchlib/features/great_circle_distance_feature.h>
#include <vespa/searchlib/features/dotproductfeature.h>
#include <vespa/searchlib/features/fieldlengthfeature.h>
#include <vespa/searchlib/features/fieldmatchfeature.h>
@@ -93,6 +94,7 @@ Test::Main()
TEST_DO(testAttributeMatch()); TEST_FLUSH();
TEST_DO(testCloseness()); TEST_FLUSH();
TEST_DO(testMatchCount()); TEST_FLUSH();
+ TEST_DO(testGreatCircleDistance()); TEST_FLUSH();
TEST_DO(testDistance()); TEST_FLUSH();
TEST_DO(testDistanceToPath()); TEST_FLUSH();
TEST_DO(testDotProduct()); TEST_FLUSH();
@@ -819,6 +821,67 @@ Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uin
ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS)));
}
+namespace {
+
+struct AirPort {
+ const char *tla;
+ double lat;
+ double lng;
+};
+
+std::pair<int32_t, int32_t> toXY(const AirPort &p) {
+ return std::make_pair((int)(p.lng * 1.0e6),
+ (int)(p.lat * 1.0e6));
+}
+
+GeoLocation toGL(const AirPort &p) {
+ int32_t x = (int)(p.lng * 1.0e6);
+ int32_t y = (int)(p.lat * 1.0e6);
+ GeoLocation::Point gp{x, y};
+ return GeoLocation{gp};
+}
+
+}
+
+void
+Test::testGreatCircleDistance()
+{
+ { // Test blueprint.
+ GreatCircleDistanceBlueprint pt;
+ EXPECT_TRUE(assertCreateInstance(pt, "great_circle_distance"));
+ StringList params, in, out;
+ FT_SETUP_FAIL(pt, params);
+ FtIndexEnvironment idx_env;
+ idx_env
+ .getBuilder()
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::INT64, "pos_zcurve");
+ FT_SETUP_OK(pt, idx_env, params.add("pos"), in,
+ out.add("km").add("latitude").add("longitude"));
+ FT_DUMP_EMPTY(_factory, "great_circle_distance");
+ }
+ { // Test executor.
+ FtFeatureTest ft(_factory, "great_circle_distance(pos)");
+ const AirPort SFO = { "SFO", 37.618806, -122.375416 };
+ const AirPort TRD = { "TRD", 63.457556, 10.924250 };
+ std::vector<std::pair<int32_t,int32_t>> pos = { toXY(SFO), toXY(TRD) };
+ setupForDistanceTest(ft, "pos_zcurve", pos, true);
+ const AirPort LHR = { "LHR", 51.477500, -0.461388 };
+ const AirPort JFK = { "JFK", 40.639928, -73.778692 };
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(LHR)});
+ ft.getQueryEnv().addLocation(GeoLocationSpec{"pos", toGL(JFK)});
+ ASSERT_TRUE(ft.setup());
+ double exp = 1494; // according to gcmap.com
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0).
+ addScore("great_circle_distance(pos)", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(10.0).
+ addScore("great_circle_distance(pos).km", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("great_circle_distance(pos).latitude", TRD.lat)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
+ addScore("great_circle_distance(pos).longitude", TRD.lng)));
+ }
+}
+
void
Test::testDistance()
{
@@ -830,7 +893,7 @@ Test::testDistance()
StringList params, in, out;
FT_SETUP_FAIL(pt, params);
FT_SETUP_OK(pt, params.add("pos"), in,
- out.add("out").add("index").add("latitude").add("longitude"));
+ out.add("out").add("index").add("latitude").add("longitude").add("km"));
FT_DUMP_EMPTY(_factory, "distance");
}
@@ -963,6 +1026,8 @@ Test::assert2DZDistance(feature_t exp, const vespalib::string & positions,
ASSERT_TRUE(ft.setup());
ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4).
addScore("distance(pos)", exp)));
+ ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4).
+ addScore("distance(pos).km", exp * 0.00011119508023)));
ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-30).
addScore("distance(pos).index", hit_index)));
ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-9).
diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h
index 58e6b4953cc..ad21d7d7ccc 100644
--- a/searchlib/src/tests/features/prod_features.h
+++ b/searchlib/src/tests/features/prod_features.h
@@ -19,6 +19,7 @@ public:
void testAttributeMatch();
void testCloseness();
void testMatchCount();
+ void testGreatCircleDistance();
void testDistance();
void testDistanceToPath();
void testDotProduct();
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 9d4119a7faa..88531a46cb1 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -12,6 +12,7 @@ vespa_add_library(searchlib_features OBJECT
debug_wait.cpp
dense_tensor_attribute_executor.cpp
direct_tensor_attribute_executor.cpp
+ great_circle_distance_feature.cpp
distancefeature.cpp
distancetopathfeature.cpp
documenttestutils.cpp
diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
index 518ade2a8f5..57fa5fc7cee 100644
--- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp
@@ -135,10 +135,15 @@ DistanceExecutor::DistanceExecutor(GeoLocationSpecPtrs locations,
void
DistanceExecutor::execute(uint32_t docId)
{
- outputs().set_number(0, calculateDistance(docId));
+ static constexpr double earth_mean_radius = 6371.0088;
+ static constexpr double deg_to_rad = M_PI / 180.0;
+ static constexpr double km_from_internal = 1.0e-6 * deg_to_rad * earth_mean_radius;
+ feature_t internal_d = calculateDistance(docId);
+ outputs().set_number(0, internal_d);
outputs().set_number(1, _best_index);
outputs().set_number(2, _best_y * 1.0e-6); // latitude
outputs().set_number(3, _best_x * 1.0e-6); // longitude
+ outputs().set_number(4, internal_d * km_from_internal); // km
}
const feature_t DistanceExecutor::DEFAULT_DISTANCE(6400000000.0);
@@ -178,6 +183,7 @@ DistanceBlueprint::setup_geopos(const IIndexEnvironment & env,
describeOutput("index", "Index in array of closest point");
describeOutput("latitude", "Latitude of closest point");
describeOutput("longitude", "Longitude of closest point");
+ describeOutput("km", "Distance in kilometer units");
env.hintAttributeAccess(_arg_string);
return true;
}
diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp
new file mode 100644
index 00000000000..eb47c88ecd0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp
@@ -0,0 +1,190 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "great_circle_distance_feature.h"
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/common/geo_location_spec.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/document/datatype/positiondatatype.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/util/issue.h>
+#include <vespa/vespalib/util/stash.h>
+#include <cmath>
+#include <limits>
+#include "utils.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.great_circle_distance_feature");
+
+using namespace search::fef;
+using namespace search::index::schema;
+using vespalib::Issue;
+
+namespace search::features {
+
+feature_t GCDExecutor::calculateGCD(uint32_t docId) {
+ feature_t dist = std::numeric_limits<feature_t>::max();
+ if (_locations.empty()) {
+ return dist;
+ }
+ _intBuf.fill(*_pos, docId);
+ uint32_t numValues = _intBuf.size();
+ int32_t docx = 0;
+ int32_t docy = 0;
+ for (auto loc : _locations) {
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy);
+ double lat = docy / 1.0e6;
+ double lng = docx / 1.0e6;
+ double d = loc.km_great_circle_distance(lat, lng);
+ if (d < dist) {
+ dist = d;
+ _best_lat = lat;
+ _best_lng = lng;
+ }
+ }
+ }
+ return dist;
+}
+
+GCDExecutor::GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos)
+ : FeatureExecutor(),
+ _locations(),
+ _pos(pos),
+ _intBuf()
+{
+ if (_pos == nullptr) {
+ return;
+ }
+ _intBuf.allocate(_pos->getMaxValueCount());
+ for (const auto * p : locations) {
+ if (p && p->location.valid()) {
+ double lat = p->location.point.y * 1.0e-6;
+ double lng = p->location.point.x * 1.0e-6;
+ _locations.emplace_back(search::common::GeoGcd{lat, lng});
+ }
+ }
+}
+
+void
+GCDExecutor::execute(uint32_t docId)
+{
+ outputs().set_number(0, calculateGCD(docId));
+ outputs().set_number(1, _best_lat); // latitude
+ outputs().set_number(2, _best_lng); // longitude
+}
+
+
+GreatCircleDistanceBlueprint::GreatCircleDistanceBlueprint() :
+ Blueprint("great_circle_distance"),
+ _attr_name()
+{
+}
+
+GreatCircleDistanceBlueprint::~GreatCircleDistanceBlueprint() = default;
+
+void GreatCircleDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+GreatCircleDistanceBlueprint::createInstance() const
+{
+ return std::make_unique<GreatCircleDistanceBlueprint>();
+}
+
+bool
+GreatCircleDistanceBlueprint::setup_geopos(const IIndexEnvironment & env, const vespalib::string &attr)
+{
+ _attr_name = attr;
+ describeOutput("km", "The distance (in km) from the query position.");
+ describeOutput("latitude", "Latitude of closest point");
+ describeOutput("longitude", "Longitude of closest point");
+ env.hintAttributeAccess(_attr_name);
+ return true;
+}
+
+
+bool
+GreatCircleDistanceBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ vespalib::string arg = params[0].getValue();
+ if (params.size() == 1) {
+ // params[0] = attribute name
+ } else if (params.size() == 2) {
+ // params[0] = "field"
+ // params[1] = attribute name
+ if (arg == "field") {
+ arg = params[1].getValue();
+ } else {
+ LOG(error, "first argument must be 'field' but was '%s'", arg.c_str());
+ return false;
+ }
+ } else {
+ LOG(error, "bad params.size() = %zd", params.size());
+ return false;
+ }
+ vespalib::string z = document::PositionDataType::getZCurveFieldName(arg);
+ const auto *fi = env.getFieldByName(z);
+ if (fi != nullptr && fi->hasAttribute()) {
+ auto dt = fi->get_data_type();
+ auto ct = fi->collection();
+ if (dt == DataType::INT64) {
+ if (ct == CollectionType::SINGLE || ct == CollectionType::ARRAY) {
+ return setup_geopos(env, z);
+ }
+ }
+ }
+ if (env.getFieldByName(arg) == nullptr && fi == nullptr) {
+ LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str());
+ } else {
+ LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", arg.c_str(), getName().c_str());
+ }
+ return false;
+}
+
+FeatureExecutor &
+GreatCircleDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ // expect geo pos:
+ const search::attribute::IAttributeVector * pos = nullptr;
+ GeoLocationSpecPtrs matching_locs;
+ GeoLocationSpecPtrs other_locs;
+
+ for (auto loc_ptr : env.getAllLocations()) {
+ if (loc_ptr && loc_ptr->location.valid()) {
+ if (loc_ptr->field_name == _attr_name) {
+ LOG(debug, "found loc from query env matching '%s'", _attr_name.c_str());
+ matching_locs.push_back(loc_ptr);
+ } else {
+ LOG(debug, "found loc(%s) from query env not matching arg(%s)",
+ loc_ptr->field_name.c_str(), _attr_name.c_str());
+ other_locs.push_back(loc_ptr);
+ }
+ }
+ }
+ if (matching_locs.empty() && other_locs.empty()) {
+ LOG(debug, "createExecutor: no valid locations");
+ return stash.create<GCDExecutor>(matching_locs, nullptr);
+ }
+ LOG(debug, "createExecutor: valid location, attribute='%s'", _attr_name.c_str());
+ pos = env.getAttributeContext().getAttribute(_attr_name);
+ if (pos != nullptr) {
+ if (!pos->isIntegerType()) {
+ Issue::report("distance feature: The position attribute '%s' is not an integer attribute.",
+ pos->getName().c_str());
+ pos = nullptr;
+ } else if (pos->getCollectionType() == attribute::CollectionType::WSET) {
+ Issue::report("distance feature: The position attribute '%s' is a weighted set attribute.",
+ pos->getName().c_str());
+ pos = nullptr;
+ }
+ } else {
+ Issue::report("distance feature: The position attribute '%s' was not found.", _attr_name.c_str());
+ }
+ LOG(debug, "use '%s' locations with pos=%p", matching_locs.empty() ? "other" : "matching", pos);
+ return stash.create<GCDExecutor>(matching_locs.empty() ? other_locs : matching_locs, pos);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h
new file mode 100644
index 00000000000..d44e4f5569b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h
@@ -0,0 +1,56 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/common/geo_gcd.h>
+
+namespace search::features {
+
+/** Convenience typedef. */
+using GeoLocationSpecPtrs = std::vector<const search::common::GeoLocationSpec *>;
+
+/**
+ * Implements the executor for the great circle distance feature.
+ */
+class GCDExecutor : public fef::FeatureExecutor {
+private:
+ std::vector<search::common::GeoGcd> _locations;
+ const attribute::IAttributeVector * _pos;
+ attribute::IntegerContent _intBuf;
+ feature_t _best_lat;
+ feature_t _best_lng;
+
+ feature_t calculateGCD(uint32_t docId);
+public:
+ /**
+ * Constructs an executor for the GCD feature.
+ *
+ * @param locations location objects associated with the query environment.
+ * @param pos the attribute to use for positions (expects zcurve encoding).
+ */
+ GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos);
+ void execute(uint32_t docId) override;
+};
+
+/**
+ * Implements the blueprint for the GCD executor.
+ */
+class GreatCircleDistanceBlueprint : public fef::Blueprint {
+private:
+ vespalib::string _attr_name;
+ bool setup_geopos(const fef::IIndexEnvironment & env, const vespalib::string &attr);
+public:
+ GreatCircleDistanceBlueprint();
+ ~GreatCircleDistanceBlueprint();
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().desc().string().desc().string().string();
+ }
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index f2d5bd745ac..2bc8a349d1b 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -10,6 +10,7 @@
#include "debug_attribute_wait.h"
#include "debug_wait.h"
#include "distancefeature.h"
+#include "great_circle_distance_feature.h"
#include "distancetopathfeature.h"
#include "dotproductfeature.h"
#include "element_completeness_feature.h"
@@ -126,6 +127,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>());
registry.addPrototype(std::make_shared<OnnxBlueprint>("onnx"));
registry.addPrototype(std::make_shared<OnnxBlueprint>("onnxModel"));
+ registry.addPrototype(std::make_shared<GreatCircleDistanceBlueprint>());
// Ranking Expression
auto replacers = std::make_unique<ListExpressionReplacer>();