diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-04-21 15:16:39 +0000 |
---|---|---|
committer | Geir Storli <geirst@yahooinc.com> | 2023-04-21 15:16:39 +0000 |
commit | e7670d3dd2790574083195068c637d130386c5b6 (patch) | |
tree | 4d7f4fe1ed9bac14e612f60773664f2d8907a58a /streamingvisitors | |
parent | 6795d0352a225559efdfd68260a578cba22a5da4 (diff) |
Integrate nearest neighbor field searcher in streaming search.
Diffstat (limited to 'streamingvisitors')
5 files changed, 51 insertions, 26 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp index 538f3efe44a..81df2b5492f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp @@ -22,13 +22,16 @@ IndexEnvironment::IndexEnvironment(IndexEnvironment &&) noexcept = default; IndexEnvironment::~IndexEnvironment() = default; bool -IndexEnvironment::addField(const vespalib::string & name, bool isAttribute) +IndexEnvironment::addField(const vespalib::string& name, + bool isAttribute, + search::fef::FieldInfo::DataType data_type) { if (getFieldByName(name) != nullptr) { return false; } FieldInfo info(isAttribute ? FieldType::ATTRIBUTE : FieldType::INDEX, FieldInfo::CollectionType::SINGLE, name, _fields.size()); + info.set_data_type(data_type); info.addAttribute(); // we are able to produce needed attributes at query time _fields.push_back(info); _fieldNames[info.name()] = info.id(); diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h index af037d87076..ef679cacdf0 100644 --- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h @@ -83,7 +83,9 @@ public: return nullptr; } - bool addField(const vespalib::string & name, bool isAttribute); + bool addField(const vespalib::string& name, + bool isAttribute, + search::fef::FieldInfo::DataType data_type); search::fef::Properties & getProperties() { return _properties; } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp index 706325a0f7a..81a2a48fb4d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp @@ -2,6 +2,7 @@ #include "rankmanager.h" #include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/fieldinfo.h> #include <vespa/searchlib/fef/functiontablefactory.h> #include <vespa/vespalib/util/stringfmt.h> #include <vespa/vespalib/util/exception.h> @@ -40,6 +41,16 @@ RankManager::Snapshot::addProperties(const vespa::config::search::RankProfilesCo } } +FieldInfo::DataType +to_data_type(VsmfieldsConfig::Fieldspec::Searchmethod search_method) +{ + if (search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR) { + return FieldInfo::DataType::TENSOR; + } + // This is the default FieldInfo data type if not specified. + return FieldInfo::DataType::DOUBLE; +} + void RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields) { @@ -49,7 +60,7 @@ RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields) LOG(debug, "Adding field of type '%s' and name '%s' with id '%u' the index environment.", isAttribute ? "ATTRIBUTE" : "INDEX", fs.name.c_str(), i); // This id must match the vsm specific field id - _protoEnv.addField(fs.name, isAttribute); + _protoEnv.addField(fs.name, isAttribute, to_data_type(fs.searchmethod)); } } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index b41eb041c57..ba97a708cc5 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -56,24 +56,28 @@ RankProcessor::initQueryEnvironment() { QueryWrapper::TermList & terms = _query.getTermList(); - for (uint32_t i = 0; i < terms.size(); ++i) { - if (terms[i].isGeoPosTerm()) { - const vespalib::string & fieldName = terms[i].getTerm()->index(); - const vespalib::string & locStr = terms[i].getTerm()->getTermString(); + for (auto& term : terms) { + if (term.isGeoPosTerm()) { + const vespalib::string & fieldName = term.getTerm()->index(); + const vespalib::string & locStr = term.getTerm()->getTermString(); _queryEnv.addGeoLocation(fieldName, locStr); } - if (!terms[i].isPhraseTerm() || terms[i].isFirstPhraseTerm()) { // register 1 term data per phrase - QueryTermData & qtd = dynamic_cast<QueryTermData &>(terms[i].getTerm()->getQueryItem()); + if (!term.isPhraseTerm() || term.isFirstPhraseTerm()) { // register 1 term data per phrase + QueryTermData & qtd = dynamic_cast<QueryTermData &>(term.getTerm()->getQueryItem()); - qtd.getTermData().setWeight(terms[i].getTerm()->weight()); - qtd.getTermData().setUniqueId(terms[i].getTerm()->uniqueId()); - if (terms[i].isFirstPhraseTerm()) { - qtd.getTermData().setPhraseLength(terms[i].getParent()->width()); + qtd.getTermData().setWeight(term.getTerm()->weight()); + qtd.getTermData().setUniqueId(term.getTerm()->uniqueId()); + if (term.isFirstPhraseTerm()) { + qtd.getTermData().setPhraseLength(term.getParent()->width()); } else { qtd.getTermData().setPhraseLength(1); } + auto* nn_term = term.getTerm()->as_nearest_neighbor_query_node(); + if (nn_term != nullptr) { + qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); + } - vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(terms[i].getTerm()->index()); + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.getTerm()->index()); const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); if (view != nullptr) { RankManager::View::const_iterator iter = view->begin(); @@ -83,17 +87,17 @@ RankProcessor::initQueryEnvironment() } } else { LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", - getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str()); + getIndexName(term.getTerm()->index(), expandedIndexName).c_str()); } LOG(debug, "Setup query term '%s:%s' (%s)", - getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str(), - terms[i].getTerm()->getTerm(), - terms[i].isFirstPhraseTerm() ? "phrase" : "term"); + getIndexName(term.getTerm()->index(), expandedIndexName).c_str(), + term.getTerm()->getTerm(), + term.isFirstPhraseTerm() ? "phrase" : "term"); _queryEnv.addTerm(&qtd.getTermData()); } else { LOG(debug, "Ignore query term '%s:%s' (part of phrase)", - terms[i].getTerm()->index().c_str(), terms[i].getTerm()->getTerm()); + term.getTerm()->index().c_str(), term.getTerm()->getTerm()); } } _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 7043e63ec87..98ed8a26938 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,17 +1,18 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vsm/searcher/boolfieldsearcher.h> +#include <vespa/vsm/searcher/floatfieldsearcher.h> +#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> +#include <vespa/vsm/searcher/geo_pos_field_searcher.h> +#include <vespa/vsm/searcher/intfieldsearcher.h> +#include <vespa/vsm/searcher/nearest_neighbor_field_searcher.h> +#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h> #include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h> #include <vespa/vsm/searcher/utf8strchrfieldsearcher.h> #include <vespa/vsm/searcher/utf8substringsearcher.h> #include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h> -#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h> -#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> -#include <vespa/vsm/searcher/intfieldsearcher.h> -#include <vespa/vsm/searcher/boolfieldsearcher.h> -#include <vespa/vsm/searcher/floatfieldsearcher.h> -#include <vespa/vsm/searcher/geo_pos_field_searcher.h> -#include <vespa/vespalib/stllike/asciistream.h> #include <regex> #include <vespa/log/log.h> @@ -109,6 +110,10 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string & case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS: _searcher = std::make_unique<GeoPosFieldSearcher>(fid); break; + case VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR: + auto dm = NearestNeighborFieldSearcher::distance_metric_from_string(arg1); + _searcher = std::make_unique<NearestNeighborFieldSearcher>(fid, dm); + break; } if (_searcher) { setMatchType(_searcher, arg1); |