summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej@yahooinc.com>2022-01-31 15:27:49 +0000
committerArne H Juul <arnej@yahooinc.com>2022-01-31 16:41:21 +0000
commit7fd99169b3196a81670daa2866b0e7653c5e4c68 (patch)
treea5741630ced09bf2c84a0c7b1c7d6e0d0c4f2036
parent72c41aa96f322d1ea06888b4ad4bf48078874409 (diff)
add great_circle_distance feature
-rw-r--r--searchlib/src/vespa/searchlib/features/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp190
-rw-r--r--searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h56
-rw-r--r--searchlib/src/vespa/searchlib/features/setup.cpp2
4 files changed, 249 insertions, 0 deletions
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
index 9d4119a7faa..88531a46cb1 100644
--- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt
@@ -12,6 +12,7 @@ vespa_add_library(searchlib_features OBJECT
debug_wait.cpp
dense_tensor_attribute_executor.cpp
direct_tensor_attribute_executor.cpp
+ great_circle_distance_feature.cpp
distancefeature.cpp
distancetopathfeature.cpp
documenttestutils.cpp
diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp
new file mode 100644
index 00000000000..eb47c88ecd0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp
@@ -0,0 +1,190 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "great_circle_distance_feature.h"
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/common/geo_location_spec.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/document/datatype/positiondatatype.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/util/issue.h>
+#include <vespa/vespalib/util/stash.h>
+#include <cmath>
+#include <limits>
+#include "utils.h"
+
+#include <vespa/log/log.h>
+LOG_SETUP(".features.great_circle_distance_feature");
+
+using namespace search::fef;
+using namespace search::index::schema;
+using vespalib::Issue;
+
+namespace search::features {
+
+feature_t GCDExecutor::calculateGCD(uint32_t docId) {
+ feature_t dist = std::numeric_limits<feature_t>::max();
+ if (_locations.empty()) {
+ return dist;
+ }
+ _intBuf.fill(*_pos, docId);
+ uint32_t numValues = _intBuf.size();
+ int32_t docx = 0;
+ int32_t docy = 0;
+ for (auto loc : _locations) {
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy);
+ double lat = docy / 1.0e6;
+ double lng = docx / 1.0e6;
+ double d = loc.km_great_circle_distance(lat, lng);
+ if (d < dist) {
+ dist = d;
+ _best_lat = lat;
+ _best_lng = lng;
+ }
+ }
+ }
+ return dist;
+}
+
+GCDExecutor::GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos)
+ : FeatureExecutor(),
+ _locations(),
+ _pos(pos),
+ _intBuf()
+{
+ if (_pos == nullptr) {
+ return;
+ }
+ _intBuf.allocate(_pos->getMaxValueCount());
+ for (const auto * p : locations) {
+ if (p && p->location.valid()) {
+ double lat = p->location.point.y * 1.0e-6;
+ double lng = p->location.point.x * 1.0e-6;
+ _locations.emplace_back(search::common::GeoGcd{lat, lng});
+ }
+ }
+}
+
+void
+GCDExecutor::execute(uint32_t docId)
+{
+ outputs().set_number(0, calculateGCD(docId));
+ outputs().set_number(1, _best_lat); // latitude
+ outputs().set_number(2, _best_lng); // longitude
+}
+
+
+GreatCircleDistanceBlueprint::GreatCircleDistanceBlueprint() :
+ Blueprint("great_circle_distance"),
+ _attr_name()
+{
+}
+
+GreatCircleDistanceBlueprint::~GreatCircleDistanceBlueprint() = default;
+
+void GreatCircleDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &,
+ IDumpFeatureVisitor &) const
+{
+}
+
+Blueprint::UP
+GreatCircleDistanceBlueprint::createInstance() const
+{
+ return std::make_unique<GreatCircleDistanceBlueprint>();
+}
+
+bool
+GreatCircleDistanceBlueprint::setup_geopos(const IIndexEnvironment & env, const vespalib::string &attr)
+{
+ _attr_name = attr;
+ describeOutput("km", "The distance (in km) from the query position.");
+ describeOutput("latitude", "Latitude of closest point");
+ describeOutput("longitude", "Longitude of closest point");
+ env.hintAttributeAccess(_attr_name);
+ return true;
+}
+
+
+bool
+GreatCircleDistanceBlueprint::setup(const IIndexEnvironment & env,
+ const ParameterList & params)
+{
+ vespalib::string arg = params[0].getValue();
+ if (params.size() == 1) {
+ // params[0] = attribute name
+ } else if (params.size() == 2) {
+ // params[0] = "field"
+ // params[1] = attribute name
+ if (arg == "field") {
+ arg = params[1].getValue();
+ } else {
+ LOG(error, "first argument must be 'field' but was '%s'", arg.c_str());
+ return false;
+ }
+ } else {
+ LOG(error, "bad params.size() = %zd", params.size());
+ return false;
+ }
+ vespalib::string z = document::PositionDataType::getZCurveFieldName(arg);
+ const auto *fi = env.getFieldByName(z);
+ if (fi != nullptr && fi->hasAttribute()) {
+ auto dt = fi->get_data_type();
+ auto ct = fi->collection();
+ if (dt == DataType::INT64) {
+ if (ct == CollectionType::SINGLE || ct == CollectionType::ARRAY) {
+ return setup_geopos(env, z);
+ }
+ }
+ }
+ if (env.getFieldByName(arg) == nullptr && fi == nullptr) {
+ LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str());
+ } else {
+ LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", arg.c_str(), getName().c_str());
+ }
+ return false;
+}
+
+FeatureExecutor &
+GreatCircleDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const
+{
+ // expect geo pos:
+ const search::attribute::IAttributeVector * pos = nullptr;
+ GeoLocationSpecPtrs matching_locs;
+ GeoLocationSpecPtrs other_locs;
+
+ for (auto loc_ptr : env.getAllLocations()) {
+ if (loc_ptr && loc_ptr->location.valid()) {
+ if (loc_ptr->field_name == _attr_name) {
+ LOG(debug, "found loc from query env matching '%s'", _attr_name.c_str());
+ matching_locs.push_back(loc_ptr);
+ } else {
+ LOG(debug, "found loc(%s) from query env not matching arg(%s)",
+ loc_ptr->field_name.c_str(), _attr_name.c_str());
+ other_locs.push_back(loc_ptr);
+ }
+ }
+ }
+ if (matching_locs.empty() && other_locs.empty()) {
+ LOG(debug, "createExecutor: no valid locations");
+ return stash.create<GCDExecutor>(matching_locs, nullptr);
+ }
+ LOG(debug, "createExecutor: valid location, attribute='%s'", _attr_name.c_str());
+ pos = env.getAttributeContext().getAttribute(_attr_name);
+ if (pos != nullptr) {
+ if (!pos->isIntegerType()) {
+ Issue::report("distance feature: The position attribute '%s' is not an integer attribute.",
+ pos->getName().c_str());
+ pos = nullptr;
+ } else if (pos->getCollectionType() == attribute::CollectionType::WSET) {
+ Issue::report("distance feature: The position attribute '%s' is a weighted set attribute.",
+ pos->getName().c_str());
+ pos = nullptr;
+ }
+ } else {
+ Issue::report("distance feature: The position attribute '%s' was not found.", _attr_name.c_str());
+ }
+ LOG(debug, "use '%s' locations with pos=%p", matching_locs.empty() ? "other" : "matching", pos);
+ return stash.create<GCDExecutor>(matching_locs.empty() ? other_locs : matching_locs, pos);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h
new file mode 100644
index 00000000000..d44e4f5569b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h
@@ -0,0 +1,56 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/fef/blueprint.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/common/geo_gcd.h>
+
+namespace search::features {
+
+/** Convenience typedef. */
+using GeoLocationSpecPtrs = std::vector<const search::common::GeoLocationSpec *>;
+
+/**
+ * Implements the executor for the great circle distance feature.
+ */
+class GCDExecutor : public fef::FeatureExecutor {
+private:
+ std::vector<search::common::GeoGcd> _locations;
+ const attribute::IAttributeVector * _pos;
+ attribute::IntegerContent _intBuf;
+ feature_t _best_lat;
+ feature_t _best_lng;
+
+ feature_t calculateGCD(uint32_t docId);
+public:
+ /**
+ * Constructs an executor for the GCD feature.
+ *
+ * @param locations location objects associated with the query environment.
+ * @param pos the attribute to use for positions (expects zcurve encoding).
+ */
+ GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos);
+ void execute(uint32_t docId) override;
+};
+
+/**
+ * Implements the blueprint for the GCD executor.
+ */
+class GreatCircleDistanceBlueprint : public fef::Blueprint {
+private:
+ vespalib::string _attr_name;
+ bool setup_geopos(const fef::IIndexEnvironment & env, const vespalib::string &attr);
+public:
+ GreatCircleDistanceBlueprint();
+ ~GreatCircleDistanceBlueprint();
+ void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override;
+ fef::Blueprint::UP createInstance() const override;
+ fef::ParameterDescriptions getDescriptions() const override {
+ return fef::ParameterDescriptions().desc().string().desc().string().string();
+ }
+ bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override;
+ fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp
index f2d5bd745ac..2bc8a349d1b 100644
--- a/searchlib/src/vespa/searchlib/features/setup.cpp
+++ b/searchlib/src/vespa/searchlib/features/setup.cpp
@@ -10,6 +10,7 @@
#include "debug_attribute_wait.h"
#include "debug_wait.h"
#include "distancefeature.h"
+#include "great_circle_distance_feature.h"
#include "distancetopathfeature.h"
#include "dotproductfeature.h"
#include "element_completeness_feature.h"
@@ -126,6 +127,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry)
registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>());
registry.addPrototype(std::make_shared<OnnxBlueprint>("onnx"));
registry.addPrototype(std::make_shared<OnnxBlueprint>("onnxModel"));
+ registry.addPrototype(std::make_shared<GreatCircleDistanceBlueprint>());
// Ranking Expression
auto replacers = std::make_unique<ListExpressionReplacer>();