diff options
Diffstat (limited to 'searchlib')
4 files changed, 249 insertions, 0 deletions
diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt index 9d4119a7faa..88531a46cb1 100644 --- a/searchlib/src/vespa/searchlib/features/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -12,6 +12,7 @@ vespa_add_library(searchlib_features OBJECT debug_wait.cpp dense_tensor_attribute_executor.cpp direct_tensor_attribute_executor.cpp + great_circle_distance_feature.cpp distancefeature.cpp distancetopathfeature.cpp documenttestutils.cpp diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp new file mode 100644 index 00000000000..eb47c88ecd0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp @@ -0,0 +1,190 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "great_circle_distance_feature.h" +#include <vespa/searchcommon/common/schema.h> +#include <vespa/searchlib/common/geo_location_spec.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/document/datatype/positiondatatype.h> +#include <vespa/vespalib/geo/zcurve.h> +#include <vespa/vespalib/util/issue.h> +#include <vespa/vespalib/util/stash.h> +#include <cmath> +#include <limits> +#include "utils.h" + +#include <vespa/log/log.h> +LOG_SETUP(".features.great_circle_distance_feature"); + +using namespace search::fef; +using namespace search::index::schema; +using vespalib::Issue; + +namespace search::features { + +feature_t GCDExecutor::calculateGCD(uint32_t docId) { + feature_t dist = std::numeric_limits<feature_t>::max(); + if (_locations.empty()) { + return dist; + } + _intBuf.fill(*_pos, docId); + uint32_t numValues = _intBuf.size(); + int32_t docx = 0; + int32_t docy = 0; + for (auto loc : _locations) { + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy); + double lat = docy / 1.0e6; + double lng = docx / 1.0e6; + double d = loc.km_great_circle_distance(lat, lng); + if (d < dist) { + dist = d; + _best_lat = lat; + _best_lng = lng; + } + } + } + return dist; +} + +GCDExecutor::GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos) + : FeatureExecutor(), + _locations(), + _pos(pos), + _intBuf() +{ + if (_pos == nullptr) { + return; + } + _intBuf.allocate(_pos->getMaxValueCount()); + for (const auto * p : locations) { + if (p && p->location.valid()) { + double lat = p->location.point.y * 1.0e-6; + double lng = p->location.point.x * 1.0e-6; + _locations.emplace_back(search::common::GeoGcd{lat, lng}); + } + } +} + +void +GCDExecutor::execute(uint32_t docId) +{ + outputs().set_number(0, calculateGCD(docId)); + outputs().set_number(1, _best_lat); // latitude + outputs().set_number(2, _best_lng); // longitude +} + + +GreatCircleDistanceBlueprint::GreatCircleDistanceBlueprint() : + Blueprint("great_circle_distance"), + _attr_name() +{ +} + +GreatCircleDistanceBlueprint::~GreatCircleDistanceBlueprint() = default; + +void GreatCircleDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +GreatCircleDistanceBlueprint::createInstance() const +{ + return std::make_unique<GreatCircleDistanceBlueprint>(); +} + +bool +GreatCircleDistanceBlueprint::setup_geopos(const IIndexEnvironment & env, const vespalib::string &attr) +{ + _attr_name = attr; + describeOutput("km", "The distance (in km) from the query position."); + describeOutput("latitude", "Latitude of closest point"); + describeOutput("longitude", "Longitude of closest point"); + env.hintAttributeAccess(_attr_name); + return true; +} + + +bool +GreatCircleDistanceBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + vespalib::string arg = params[0].getValue(); + if (params.size() == 1) { + // params[0] = attribute name + } else if (params.size() == 2) { + // params[0] = "field" + // params[1] = attribute name + if (arg == "field") { + arg = params[1].getValue(); + } else { + LOG(error, "first argument must be 'field' but was '%s'", arg.c_str()); + return false; + } + } else { + LOG(error, "bad params.size() = %zd", params.size()); + return false; + } + vespalib::string z = document::PositionDataType::getZCurveFieldName(arg); + const auto *fi = env.getFieldByName(z); + if (fi != nullptr && fi->hasAttribute()) { + auto dt = fi->get_data_type(); + auto ct = fi->collection(); + if (dt == DataType::INT64) { + if (ct == CollectionType::SINGLE || ct == CollectionType::ARRAY) { + return setup_geopos(env, z); + } + } + } + if (env.getFieldByName(arg) == nullptr && fi == nullptr) { + LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str()); + } else { + LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", arg.c_str(), getName().c_str()); + } + return false; +} + +FeatureExecutor & +GreatCircleDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash &stash) const +{ + // expect geo pos: + const search::attribute::IAttributeVector * pos = nullptr; + GeoLocationSpecPtrs matching_locs; + GeoLocationSpecPtrs other_locs; + + for (auto loc_ptr : env.getAllLocations()) { + if (loc_ptr && loc_ptr->location.valid()) { + if (loc_ptr->field_name == _attr_name) { + LOG(debug, "found loc from query env matching '%s'", _attr_name.c_str()); + matching_locs.push_back(loc_ptr); + } else { + LOG(debug, "found loc(%s) from query env not matching arg(%s)", + loc_ptr->field_name.c_str(), _attr_name.c_str()); + other_locs.push_back(loc_ptr); + } + } + } + if (matching_locs.empty() && other_locs.empty()) { + LOG(debug, "createExecutor: no valid locations"); + return stash.create<GCDExecutor>(matching_locs, nullptr); + } + LOG(debug, "createExecutor: valid location, attribute='%s'", _attr_name.c_str()); + pos = env.getAttributeContext().getAttribute(_attr_name); + if (pos != nullptr) { + if (!pos->isIntegerType()) { + Issue::report("distance feature: The position attribute '%s' is not an integer attribute.", + pos->getName().c_str()); + pos = nullptr; + } else if (pos->getCollectionType() == attribute::CollectionType::WSET) { + Issue::report("distance feature: The position attribute '%s' is a weighted set attribute.", + pos->getName().c_str()); + pos = nullptr; + } + } else { + Issue::report("distance feature: The position attribute '%s' was not found.", _attr_name.c_str()); + } + LOG(debug, "use '%s' locations with pos=%p", matching_locs.empty() ? "other" : "matching", pos); + return stash.create<GCDExecutor>(matching_locs.empty() ? other_locs : matching_locs, pos); +} + +} diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h new file mode 100644 index 00000000000..d44e4f5569b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/searchlib/common/geo_gcd.h> + +namespace search::features { + +/** Convenience typedef. */ +using GeoLocationSpecPtrs = std::vector<const search::common::GeoLocationSpec *>; + +/** + * Implements the executor for the great circle distance feature. + */ +class GCDExecutor : public fef::FeatureExecutor { +private: + std::vector<search::common::GeoGcd> _locations; + const attribute::IAttributeVector * _pos; + attribute::IntegerContent _intBuf; + feature_t _best_lat; + feature_t _best_lng; + + feature_t calculateGCD(uint32_t docId); +public: + /** + * Constructs an executor for the GCD feature. + * + * @param locations location objects associated with the query environment. + * @param pos the attribute to use for positions (expects zcurve encoding). + */ + GCDExecutor(GeoLocationSpecPtrs locations, const attribute::IAttributeVector * pos); + void execute(uint32_t docId) override; +}; + +/** + * Implements the blueprint for the GCD executor. + */ +class GreatCircleDistanceBlueprint : public fef::Blueprint { +private: + vespalib::string _attr_name; + bool setup_geopos(const fef::IIndexEnvironment & env, const vespalib::string &attr); +public: + GreatCircleDistanceBlueprint(); + ~GreatCircleDistanceBlueprint(); + void visitDumpFeatures(const fef::IIndexEnvironment & env, fef::IDumpFeatureVisitor & visitor) const override; + fef::Blueprint::UP createInstance() const override; + fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions().desc().string().desc().string().string(); + } + bool setup(const fef::IIndexEnvironment & env, const fef::ParameterList & params) override; + fef::FeatureExecutor &createExecutor(const fef::IQueryEnvironment &env, vespalib::Stash &stash) const override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp index f2d5bd745ac..2bc8a349d1b 100644 --- a/searchlib/src/vespa/searchlib/features/setup.cpp +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -10,6 +10,7 @@ #include "debug_attribute_wait.h" #include "debug_wait.h" #include "distancefeature.h" +#include "great_circle_distance_feature.h" #include "distancetopathfeature.h" #include "dotproductfeature.h" #include "element_completeness_feature.h" @@ -126,6 +127,7 @@ void setup_search_features(fef::IBlueprintRegistry & registry) registry.addPrototype(std::make_shared<GlobalSequenceBlueprint>()); registry.addPrototype(std::make_shared<OnnxBlueprint>("onnx")); registry.addPrototype(std::make_shared<OnnxBlueprint>("onnxModel")); + registry.addPrototype(std::make_shared<GreatCircleDistanceBlueprint>()); // Ranking Expression auto replacers = std::make_unique<ListExpressionReplacer>(); |