From e7670d3dd2790574083195068c637d130386c5b6 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 21 Apr 2023 15:16:39 +0000 Subject: Integrate nearest neighbor field searcher in streaming search. --- .../src/vespa/searchvisitor/indexenvironment.cpp | 5 ++- .../src/vespa/searchvisitor/indexenvironment.h | 4 ++- .../src/vespa/searchvisitor/rankmanager.cpp | 13 +++++++- .../src/vespa/searchvisitor/rankprocessor.cpp | 36 ++++++++++++---------- .../src/vespa/vsm/vsm/fieldsearchspec.cpp | 19 +++++++----- 5 files changed, 51 insertions(+), 26 deletions(-) diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 538f3efe44a..81df2b5492f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -22,13 +22,16 @@ IndexEnvironment::IndexEnvironment(IndexEnvironment &&) noexcept = default; IndexEnvironment::~IndexEnvironment() = default; bool -IndexEnvironment::addField(const vespalib::string & name, bool isAttribute) +IndexEnvironment::addField(const vespalib::string& name, + bool isAttribute, + search::fef::FieldInfo::DataType data_type) { if (getFieldByName(name) != nullptr) { return false; } FieldInfo info(isAttribute ? FieldType::ATTRIBUTE : FieldType::INDEX, FieldInfo::CollectionType::SINGLE, name, _fields.size()); + info.set_data_type(data_type); info.addAttribute(); // we are able to produce needed attributes at query time _fields.push_back(info); _fieldNames[info.name()] = info.id(); diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index af037d87076..ef679cacdf0 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -83,7 +83,9 @@ public: return nullptr; } - bool addField(const vespalib::string & name, bool isAttribute); + bool addField(const vespalib::string& name, + bool isAttribute, + search::fef::FieldInfo::DataType data_type); search::fef::Properties & getProperties() { return _properties; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 706325a0f7a..81a2a48fb4d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -2,6 +2,7 @@ #include "rankmanager.h" #include +#include #include #include #include @@ -40,6 +41,16 @@ RankManager::Snapshot::addProperties(const vespa::config::search::RankProfilesCo } } +FieldInfo::DataType +to_data_type(VsmfieldsConfig::Fieldspec::Searchmethod search_method) +{ + if (search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR) { + return FieldInfo::DataType::TENSOR; + } + // This is the default FieldInfo data type if not specified. + return FieldInfo::DataType::DOUBLE; +} + void RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields) { @@ -49,7 +60,7 @@ RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields) LOG(debug, "Adding field of type '%s' and name '%s' with id '%u' the index environment.", isAttribute ? "ATTRIBUTE" : "INDEX", fs.name.c_str(), i); // This id must match the vsm specific field id - _protoEnv.addField(fs.name, isAttribute); + _protoEnv.addField(fs.name, isAttribute, to_data_type(fs.searchmethod)); } } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index b41eb041c57..ba97a708cc5 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -56,24 +56,28 @@ RankProcessor::initQueryEnvironment() { QueryWrapper::TermList & terms = _query.getTermList(); - for (uint32_t i = 0; i < terms.size(); ++i) { - if (terms[i].isGeoPosTerm()) { - const vespalib::string & fieldName = terms[i].getTerm()->index(); - const vespalib::string & locStr = terms[i].getTerm()->getTermString(); + for (auto& term : terms) { + if (term.isGeoPosTerm()) { + const vespalib::string & fieldName = term.getTerm()->index(); + const vespalib::string & locStr = term.getTerm()->getTermString(); _queryEnv.addGeoLocation(fieldName, locStr); } - if (!terms[i].isPhraseTerm() || terms[i].isFirstPhraseTerm()) { // register 1 term data per phrase - QueryTermData & qtd = dynamic_cast(terms[i].getTerm()->getQueryItem()); + if (!term.isPhraseTerm() || term.isFirstPhraseTerm()) { // register 1 term data per phrase + QueryTermData & qtd = dynamic_cast(term.getTerm()->getQueryItem()); - qtd.getTermData().setWeight(terms[i].getTerm()->weight()); - qtd.getTermData().setUniqueId(terms[i].getTerm()->uniqueId()); - if (terms[i].isFirstPhraseTerm()) { - qtd.getTermData().setPhraseLength(terms[i].getParent()->width()); + qtd.getTermData().setWeight(term.getTerm()->weight()); + qtd.getTermData().setUniqueId(term.getTerm()->uniqueId()); + if (term.isFirstPhraseTerm()) { + qtd.getTermData().setPhraseLength(term.getParent()->width()); } else { qtd.getTermData().setPhraseLength(1); } + auto* nn_term = term.getTerm()->as_nearest_neighbor_query_node(); + if (nn_term != nullptr) { + qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); + } - vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(terms[i].getTerm()->index()); + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.getTerm()->index()); const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); if (view != nullptr) { RankManager::View::const_iterator iter = view->begin(); @@ -83,17 +87,17 @@ RankProcessor::initQueryEnvironment() } } else { LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", - getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str()); + getIndexName(term.getTerm()->index(), expandedIndexName).c_str()); } LOG(debug, "Setup query term '%s:%s' (%s)", - getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str(), - terms[i].getTerm()->getTerm(), - terms[i].isFirstPhraseTerm() ? "phrase" : "term"); + getIndexName(term.getTerm()->index(), expandedIndexName).c_str(), + term.getTerm()->getTerm(), + term.isFirstPhraseTerm() ? "phrase" : "term"); _queryEnv.addTerm(&qtd.getTermData()); } else { LOG(debug, "Ignore query term '%s:%s' (part of phrase)", - terms[i].getTerm()->index().c_str(), terms[i].getTerm()->getTerm()); + term.getTerm()->index().c_str(), term.getTerm()->getTerm()); } } _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 7043e63ec87..98ed8a26938 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,17 +1,18 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include @@ -109,6 +110,10 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS: _searcher = std::make_unique(fid); break; + case VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR: + auto dm = NearestNeighborFieldSearcher::distance_metric_from_string(arg1); + _searcher = std::make_unique(fid, dm); + break; } if (_searcher) { setMatchType(_searcher, arg1); -- cgit v1.2.3