aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-04-22 11:38:49 +0200
committerGitHub <noreply@github.com>2023-04-22 11:38:49 +0200
commite2122d6ad5b018ec11dd9eb35c39a057aa0ed540 (patch)
tree906c1b9e380e55fa5a8b82e1db945f48765e6275
parent864778ea5f78cedf0c09282ab6d200889771f633 (diff)
parente7670d3dd2790574083195068c637d130386c5b6 (diff)
Merge pull request #26816 from vespa-engine/geirst/nearest-neighbor-in-streaming-searchv8.156.36
Integrate nearest neighbor field searcher in streaming search.
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp5
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/indexenvironment.h4
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp13
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp36
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp19
5 files changed, 51 insertions, 26 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
index 538f3efe44a..81df2b5492f 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.cpp
@@ -22,13 +22,16 @@ IndexEnvironment::IndexEnvironment(IndexEnvironment &&) noexcept = default;
IndexEnvironment::~IndexEnvironment() = default;
bool
-IndexEnvironment::addField(const vespalib::string & name, bool isAttribute)
+IndexEnvironment::addField(const vespalib::string& name,
+ bool isAttribute,
+ search::fef::FieldInfo::DataType data_type)
{
if (getFieldByName(name) != nullptr) {
return false;
}
FieldInfo info(isAttribute ? FieldType::ATTRIBUTE : FieldType::INDEX,
FieldInfo::CollectionType::SINGLE, name, _fields.size());
+ info.set_data_type(data_type);
info.addAttribute(); // we are able to produce needed attributes at query time
_fields.push_back(info);
_fieldNames[info.name()] = info.id();
diff --git a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
index af037d87076..ef679cacdf0 100644
--- a/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
+++ b/streamingvisitors/src/vespa/searchvisitor/indexenvironment.h
@@ -83,7 +83,9 @@ public:
return nullptr;
}
- bool addField(const vespalib::string & name, bool isAttribute);
+ bool addField(const vespalib::string& name,
+ bool isAttribute,
+ search::fef::FieldInfo::DataType data_type);
search::fef::Properties & getProperties() { return _properties; }
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
index 706325a0f7a..81a2a48fb4d 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankmanager.cpp
@@ -2,6 +2,7 @@
#include "rankmanager.h"
#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/fieldinfo.h>
#include <vespa/searchlib/fef/functiontablefactory.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/util/exception.h>
@@ -40,6 +41,16 @@ RankManager::Snapshot::addProperties(const vespa::config::search::RankProfilesCo
}
}
+FieldInfo::DataType
+to_data_type(VsmfieldsConfig::Fieldspec::Searchmethod search_method)
+{
+ if (search_method == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR) {
+ return FieldInfo::DataType::TENSOR;
+ }
+ // This is the default FieldInfo data type if not specified.
+ return FieldInfo::DataType::DOUBLE;
+}
+
void
RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields)
{
@@ -49,7 +60,7 @@ RankManager::Snapshot::detectFields(const VsmfieldsHandle & fields)
LOG(debug, "Adding field of type '%s' and name '%s' with id '%u' the index environment.",
isAttribute ? "ATTRIBUTE" : "INDEX", fs.name.c_str(), i);
// This id must match the vsm specific field id
- _protoEnv.addField(fs.name, isAttribute);
+ _protoEnv.addField(fs.name, isAttribute, to_data_type(fs.searchmethod));
}
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index b41eb041c57..ba97a708cc5 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -56,24 +56,28 @@ RankProcessor::initQueryEnvironment()
{
QueryWrapper::TermList & terms = _query.getTermList();
- for (uint32_t i = 0; i < terms.size(); ++i) {
- if (terms[i].isGeoPosTerm()) {
- const vespalib::string & fieldName = terms[i].getTerm()->index();
- const vespalib::string & locStr = terms[i].getTerm()->getTermString();
+ for (auto& term : terms) {
+ if (term.isGeoPosTerm()) {
+ const vespalib::string & fieldName = term.getTerm()->index();
+ const vespalib::string & locStr = term.getTerm()->getTermString();
_queryEnv.addGeoLocation(fieldName, locStr);
}
- if (!terms[i].isPhraseTerm() || terms[i].isFirstPhraseTerm()) { // register 1 term data per phrase
- QueryTermData & qtd = dynamic_cast<QueryTermData &>(terms[i].getTerm()->getQueryItem());
+ if (!term.isPhraseTerm() || term.isFirstPhraseTerm()) { // register 1 term data per phrase
+ QueryTermData & qtd = dynamic_cast<QueryTermData &>(term.getTerm()->getQueryItem());
- qtd.getTermData().setWeight(terms[i].getTerm()->weight());
- qtd.getTermData().setUniqueId(terms[i].getTerm()->uniqueId());
- if (terms[i].isFirstPhraseTerm()) {
- qtd.getTermData().setPhraseLength(terms[i].getParent()->width());
+ qtd.getTermData().setWeight(term.getTerm()->weight());
+ qtd.getTermData().setUniqueId(term.getTerm()->uniqueId());
+ if (term.isFirstPhraseTerm()) {
+ qtd.getTermData().setPhraseLength(term.getParent()->width());
} else {
qtd.getTermData().setPhraseLength(1);
}
+ auto* nn_term = term.getTerm()->as_nearest_neighbor_query_node();
+ if (nn_term != nullptr) {
+ qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name());
+ }
- vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(terms[i].getTerm()->index());
+ vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.getTerm()->index());
const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName);
if (view != nullptr) {
RankManager::View::const_iterator iter = view->begin();
@@ -83,17 +87,17 @@ RankProcessor::initQueryEnvironment()
}
} else {
LOG(warning, "Could not find a view for index '%s'. Ranking no fields.",
- getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str());
+ getIndexName(term.getTerm()->index(), expandedIndexName).c_str());
}
LOG(debug, "Setup query term '%s:%s' (%s)",
- getIndexName(terms[i].getTerm()->index(), expandedIndexName).c_str(),
- terms[i].getTerm()->getTerm(),
- terms[i].isFirstPhraseTerm() ? "phrase" : "term");
+ getIndexName(term.getTerm()->index(), expandedIndexName).c_str(),
+ term.getTerm()->getTerm(),
+ term.isFirstPhraseTerm() ? "phrase" : "term");
_queryEnv.addTerm(&qtd.getTermData());
} else {
LOG(debug, "Ignore query term '%s:%s' (part of phrase)",
- terms[i].getTerm()->index().c_str(), terms[i].getTerm()->getTerm());
+ term.getTerm()->index().c_str(), term.getTerm()->getTerm());
}
}
_rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore());
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index 7043e63ec87..98ed8a26938 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -1,17 +1,18 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "fieldsearchspec.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vsm/searcher/boolfieldsearcher.h>
+#include <vespa/vsm/searcher/floatfieldsearcher.h>
+#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
+#include <vespa/vsm/searcher/geo_pos_field_searcher.h>
+#include <vespa/vsm/searcher/intfieldsearcher.h>
+#include <vespa/vsm/searcher/nearest_neighbor_field_searcher.h>
+#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h>
#include <vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h>
#include <vespa/vsm/searcher/utf8strchrfieldsearcher.h>
#include <vespa/vsm/searcher/utf8substringsearcher.h>
#include <vespa/vsm/searcher/utf8suffixstringfieldsearcher.h>
-#include <vespa/vsm/searcher/utf8exactstringfieldsearcher.h>
-#include <vespa/vsm/searcher/futf8strchrfieldsearcher.h>
-#include <vespa/vsm/searcher/intfieldsearcher.h>
-#include <vespa/vsm/searcher/boolfieldsearcher.h>
-#include <vespa/vsm/searcher/floatfieldsearcher.h>
-#include <vespa/vsm/searcher/geo_pos_field_searcher.h>
-#include <vespa/vespalib/stllike/asciistream.h>
#include <regex>
#include <vespa/log/log.h>
@@ -109,6 +110,10 @@ FieldSearchSpec::FieldSearchSpec(const FieldIdT & fid, const vespalib::string &
case VsmfieldsConfig::Fieldspec::Searchmethod::GEOPOS:
_searcher = std::make_unique<GeoPosFieldSearcher>(fid);
break;
+ case VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR:
+ auto dm = NearestNeighborFieldSearcher::distance_metric_from_string(arg1);
+ _searcher = std::make_unique<NearestNeighborFieldSearcher>(fid, dm);
+ break;
}
if (_searcher) {
setMatchType(_searcher, arg1);