diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-08-18 10:00:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-18 10:00:03 +0200 |
commit | ef6227ebc92c625081f703a806e0b0c2c02a5a1c (patch) | |
tree | 9af42a5c95c9dc78fc739567b74f5b5b85ee1e42 | |
parent | 1ec11fea21acbb3a0f6a8d04da9c6aea54d33b63 (diff) | |
parent | 71387f83fbcfe1353f328c66024924d03fba3ca3 (diff) |
Merge pull request #14056 from vespa-engine/arnej/set-rawscore-for-locations
Arnej/set rawscore for locations
3 files changed, 126 insertions, 3 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 032caaacba8..b9e4bf565ef 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -263,8 +263,18 @@ public: const common::Location &location() const { return _location; } SearchIterator::UP - createLeafSearch(const TermFieldMatchDataArray &, bool strict) const override + createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override { + if (tfmda.size() == 1) { + // search in exactly one field + fef::TermFieldMatchData &tfmd = *tfmda[0]; + return search::common::create_location_iterator(tfmd, + _attribute.getNumDocs(), + strict, + _location); + } else { + LOG(debug, "wrong size tfmda: %zu (fallback to old location iterator)\n", tfmda.size()); + } return FastS_AllocLocationIterator(_attribute.getNumDocs(), strict, _location); } }; @@ -273,7 +283,8 @@ public: Blueprint::UP make_location_blueprint(const FieldSpec &field, const IAttributeVector &attribute, const Location &loc) { - LOG(debug, "make_location_blueprint(p[%d,%d], r[%u], aspect[%u], bb[[%d,%d],[%d,%d]])", + LOG(debug, "make_location_blueprint(fieldId[%u], p[%d,%d], r[%u], aspect[%u], bb[[%d,%d],[%d,%d]])", + field.getFieldId(), loc.point.x, loc.point.y, loc.radius, loc.x_aspect.multiplier, loc.bounding_box.x.low, loc.bounding_box.x.high, diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.cpp b/searchlib/src/vespa/searchlib/common/locationiterators.cpp index d90ed3b41f3..413930522c6 100644 --- a/searchlib/src/vespa/searchlib/common/locationiterators.cpp +++ b/searchlib/src/vespa/searchlib/common/locationiterators.cpp @@ -7,6 +7,108 @@ #include <vespa/log/log.h> LOG_SETUP(".searchlib.common.locationiterators"); +namespace search::common { + +class LocationIterator : public search::queryeval::SearchIterator +{ +private: + static constexpr double pi = 3.14159265358979323846; + // microdegrees -> degrees -> radians -> km (using Earth mean radius) + static constexpr double udeg_to_km = 1.0e-6 * (pi / 180.0) * 6371.0088; + search::fef::TermFieldMatchData & _tfmd; + const unsigned int _numDocs; + const bool _strict; + const Location & _location; + uint32_t _num_values; + std::vector<search::AttributeVector::largeint_t> _pos; + + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; +public: + LocationIterator(search::fef::TermFieldMatchData &tfmd, + unsigned int numDocs, + bool strict, + const Location & location); + ~LocationIterator() override; +}; + +LocationIterator::LocationIterator(search::fef::TermFieldMatchData &tfmd, + unsigned int numDocs, + bool strict, + const Location & location) + : SearchIterator(), + _tfmd(tfmd), + _numDocs(numDocs), + _strict(strict), + _location(location), + _num_values(0), + _pos() +{ + _pos.resize(1); //Need at least 1 entry as the singlevalue attributes does not honour given size. + LOG(debug, "created LocationIterator(numDocs=%u)\n", numDocs); +}; + + +LocationIterator::~LocationIterator() = default; + +void +LocationIterator::doSeek(uint32_t docId) +{ + while (__builtin_expect(docId < getEndId(), true)) { + if (__builtin_expect(docId >= _numDocs, false)) { + break; + } + _num_values = _location.getVec()->get(docId, &_pos[0], _pos.size()); + while (_num_values > _pos.size()) { + _pos.resize(_num_values); + _num_values = _location.getVec()->get(docId, &_pos[0], _pos.size()); + } + for (uint32_t i = 0; i < _num_values; i++) { + int64_t docxy(_pos[i]); + if (_location.inside_limit(docxy)) { + setDocId(docId); + return; + } + } + if (!_strict) { + return; + } + ++docId; + } + setAtEnd(); +} + +void +LocationIterator::doUnpack(uint32_t docId) +{ + uint64_t sqabsdist = std::numeric_limits<uint64_t>::max(); + int32_t docx = 0; + int32_t docy = 0; + // use _num_values from _pos fetched in doSeek() + for (uint32_t i = 0; i < _num_values; i++) { + int64_t docxy(_pos[i]); + vespalib::geo::ZCurve::decode(docxy, &docx, &docy); + uint64_t sqdist = _location.sq_distance_to({docx, docy}); + if (sqdist < sqabsdist) { + sqabsdist = sqdist; + } + } + double dist = std::sqrt(double(sqabsdist)); + double score = 1.0 / (1.0 + (udeg_to_km * dist)); + LOG(debug, "unpack LI(%u) score %f\n", docId, score); + LOG(debug, "distance: %f micro-degrees ~= %f km", dist, udeg_to_km * dist); + _tfmd.setRawScore(docId, score); +} + +std::unique_ptr<search::queryeval::SearchIterator> +create_location_iterator(search::fef::TermFieldMatchData &tfmd, unsigned int numDocs, + bool strict, const Location & location) +{ + return std::make_unique<LocationIterator>(tfmd, numDocs, strict, location); +} + +} // namespace + using namespace search::common; class FastS_2DZLocationIterator : public search::queryeval::SearchIterator diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.h b/searchlib/src/vespa/searchlib/common/locationiterators.h index e345bcae4fe..d963ac2e479 100644 --- a/searchlib/src/vespa/searchlib/common/locationiterators.h +++ b/searchlib/src/vespa/searchlib/common/locationiterators.h @@ -4,9 +4,19 @@ #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/common/location.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +namespace search::common { + +std::unique_ptr<search::queryeval::SearchIterator> +create_location_iterator(search::fef::TermFieldMatchData &tfmd, + unsigned int numDocs, + bool strict, + const Location & location); + +} // namespace std::unique_ptr<search::queryeval::SearchIterator> FastS_AllocLocationIterator(unsigned int numDocs, bool strict, const search::common::Location & location); - |