From ee681536a92362033703a379b200ba333ed5b42b Mon Sep 17 00:00:00 2001 From: Arne H Juul Date: Thu, 3 Feb 2022 20:09:50 +0000 Subject: geo distance for streaming * try to make the distance rank features work also in streaming mode * don't check the index environment too closely (it has wrong information) * expect matching locations from the query to have the field name (without _zcurve suffix) as their "attribute" --- .../vespa/searchlib/features/distancefeature.cpp | 26 ++++++++++-------- .../src/vespa/searchlib/features/distancefeature.h | 1 + .../features/great_circle_distance_feature.cpp | 32 ++++++++++++++-------- .../features/great_circle_distance_feature.h | 1 + 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp index 57fa5fc7cee..7429f32cf4f 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.cpp +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -151,6 +151,7 @@ const feature_t DistanceExecutor::DEFAULT_DISTANCE(6400000000.0); DistanceBlueprint::DistanceBlueprint() : Blueprint("distance"), + _field_name(), _arg_string(), _attr_id(search::index::Schema::UNKNOWN_FIELD_ID), _use_geo_pos(false), @@ -208,7 +209,7 @@ DistanceBlueprint::setup(const IIndexEnvironment & env, bool allow_bad_field = true; if (params.size() == 2) { // params[0] = field / label - // params[0] = attribute name / label value + // params[1] = attribute name / label value if (arg == "label") { _arg_string = params[1].getValue(); _use_item_label = true; @@ -218,12 +219,18 @@ DistanceBlueprint::setup(const IIndexEnvironment & env, arg = params[1].getValue(); allow_bad_field = false; } else { - LOG(error, "first argument must be 'field' or 'label', but was '%s'", - arg.c_str()); + LOG(error, "first argument must be 'field' or 'label', but was '%s'", arg.c_str()); return false; } } - const FieldInfo *fi = env.getFieldByName(arg); + _field_name = arg; + vespalib::string z = document::PositionDataType::getZCurveFieldName(arg); + const FieldInfo *fi = env.getFieldByName(z); + if (fi != nullptr && fi->hasAttribute()) { + // can't check anything here because streaming has wrong information + return setup_geopos(env, z); + } + fi = env.getFieldByName(arg); if (fi != nullptr && fi->hasAttribute()) { auto dt = fi->get_data_type(); auto ct = fi->collection(); @@ -236,17 +243,12 @@ DistanceBlueprint::setup(const IIndexEnvironment & env, return setup_geopos(env, arg); } } - vespalib::string z = document::PositionDataType::getZCurveFieldName(arg); - fi = env.getFieldByName(z); - if (fi != nullptr && fi->hasAttribute()) { - return setup_geopos(env, z); - } if (allow_bad_field) { // TODO remove on Vespa 8 // backwards compatibility fallback: return setup_geopos(env, arg); } - if (env.getFieldByName(arg) == nullptr && fi == nullptr) { + if (env.getFieldByName(arg) == nullptr) { LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str()); } else { LOG(error, "field '%s' must be an attribute for rank feature %s\n", arg.c_str(), getName().c_str()); @@ -270,7 +272,9 @@ DistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespalib::Stash for (auto loc_ptr : env.getAllLocations()) { if (_use_geo_pos && loc_ptr && loc_ptr->location.valid()) { - if (loc_ptr->field_name == _arg_string) { + if (loc_ptr->field_name == _arg_string || + loc_ptr->field_name == _field_name) + { LOG(debug, "found loc from query env matching '%s'", _arg_string.c_str()); matching_locs.push_back(loc_ptr); } else { diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h index b60072cf872..6eff0380c3a 100644 --- a/searchlib/src/vespa/searchlib/features/distancefeature.h +++ b/searchlib/src/vespa/searchlib/features/distancefeature.h @@ -44,6 +44,7 @@ public: */ class DistanceBlueprint : public fef::Blueprint { private: + vespalib::string _field_name; vespalib::string _arg_string; uint32_t _attr_id; bool _use_geo_pos; diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp index 0dfbfedf877..39a82ca8237 100644 --- a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.cpp @@ -111,37 +111,45 @@ bool GreatCircleDistanceBlueprint::setup(const IIndexEnvironment & env, const ParameterList & params) { - vespalib::string arg = params[0].getValue(); if (params.size() == 1) { - // params[0] = attribute name + _field_name = params[0].getValue(); } else if (params.size() == 2) { // params[0] = "field" // params[1] = attribute name - if (arg == "field") { - arg = params[1].getValue(); + if (params[0].getValue() == "field") { + _field_name = params[1].getValue(); } else { - LOG(error, "first argument must be 'field' but was '%s'", arg.c_str()); + LOG(error, "first argument must be 'field' but was '%s'", params[0].getValue().c_str()); return false; } } else { - LOG(error, "bad params.size() = %zd", params.size()); + LOG(error, "Wants 2 parameters, but got %zd", params.size()); return false; } - vespalib::string z = document::PositionDataType::getZCurveFieldName(arg); + vespalib::string z = document::PositionDataType::getZCurveFieldName(_field_name); const auto *fi = env.getFieldByName(z); if (fi != nullptr && fi->hasAttribute()) { auto dt = fi->get_data_type(); auto ct = fi->collection(); + LOG(spam, "index env has attribute for field '%s' which is: %s%s", + z.c_str(), + (ct == CollectionType::SINGLE ? "" : + (ct == CollectionType::ARRAY ? "array of " : "collection of ")), + (dt == DataType::INT64 ? "int64" : + (dt == DataType::DOUBLE ? "double" : "something"))); + /* we can't check these because streaming has wrong information if (dt == DataType::INT64) { if (ct == CollectionType::SINGLE || ct == CollectionType::ARRAY) { return setup_geopos(env, z); } } + */ + return setup_geopos(env, z); } - if (env.getFieldByName(arg) == nullptr && fi == nullptr) { - LOG(error, "unknown field '%s' for rank feature %s\n", arg.c_str(), getName().c_str()); + if (env.getFieldByName(_field_name) == nullptr && fi == nullptr) { + LOG(error, "unknown field '%s' for rank feature %s\n", _field_name.c_str(), getName().c_str()); } else { - LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", arg.c_str(), getName().c_str()); + LOG(error, "field '%s' must be type position and attribute for rank feature %s\n", _field_name.c_str(), getName().c_str()); } return false; } @@ -156,7 +164,9 @@ GreatCircleDistanceBlueprint::createExecutor(const IQueryEnvironment &env, vespa for (auto loc_ptr : env.getAllLocations()) { if (loc_ptr && loc_ptr->location.valid()) { - if (loc_ptr->field_name == _attr_name) { + if (loc_ptr->field_name == _attr_name || + loc_ptr->field_name == _field_name) + { LOG(debug, "found loc from query env matching '%s'", _attr_name.c_str()); matching_locs.push_back(loc_ptr); } else { diff --git a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h index d44e4f5569b..22a464ed5fa 100644 --- a/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h +++ b/searchlib/src/vespa/searchlib/features/great_circle_distance_feature.h @@ -39,6 +39,7 @@ public: */ class GreatCircleDistanceBlueprint : public fef::Blueprint { private: + vespalib::string _field_name; vespalib::string _attr_name; bool setup_geopos(const fef::IIndexEnvironment & env, const vespalib::string &attr); public: -- cgit v1.2.3